diff --git "a/data/prs.json" "b/data/prs.json"
--- "a/data/prs.json"
+++ "b/data/prs.json"
@@ -1,29225 +1,28211 @@
 [
   {
-    "additions": 18,
-    "author": "JaredforReal",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? ### Get the rope operation right Before: NeoX split-half style After: GPT-J/interleaved style(`interleaved=True` same as `is_neox_style=Flase`) the right one ### Get rid of `F.relu` Reason: - `F.relu` works with `ac\u2026",
-    "changed_files": 2,
+    "additions": 10,
+    "author": "Abdennacer-Badaoui",
+    "author_association": "MEMBER",
+    "body_excerpt": "`_register_model_output_pytree_node` was calling set.__contains__ during TorchDynamo tracing, which is unsupported in PyTorch 2.8.0 (ROCm). Added an early return when `torch.compiler.is_compiling()` is True, since pytree nodes are already\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45017",
-    "created_at": "2026-03-26T09:21:10Z",
-    "deletions": 28,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45282",
+    "created_at": "2026-04-07T08:50:54Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45017/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45017",
+    "files_url": "https://github.com/huggingface/transformers/pull/45282/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45282",
     "labels": [],
     "merged": false,
-    "number": 45017,
-    "review_comments_count": 5,
+    "number": 45282,
+    "review_comments_count": 0,
     "state": "open",
-    "title": "[WIP][Fix] GLM 5 set `apply_rotary_pos_emb` to `is_neox_style=False` && remove `F.relu()`",
-    "updated_at": "2026-03-26T10:14:50Z"
+    "title": "[AMD CI] Fix torch.compile/export failures on AMD CI due to untraceable set.__contains__ ",
+    "updated_at": "2026-04-07T09:00:34Z"
   },
   {
-    "additions": 64,
-    "author": "inisis",
+    "additions": 6,
+    "author": "zhang-prog",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "body_excerpt": "### Description This PR fixes a boundary issue in the `_extract_polygon_points_by_masks` method of PP-DocLayoutV3. When running inference with a low confidence threshold, or due to coordinate clipping during scaling, the extracted `cropped\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45016",
-    "created_at": "2026-03-26T09:09:41Z",
-    "deletions": 4,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45281",
+    "created_at": "2026-04-07T08:49:24Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45016/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45016",
+    "files_url": "https://github.com/huggingface/transformers/pull/45281/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45281",
     "labels": [],
     "merged": false,
-    "number": 45016,
+    "number": 45281,
     "review_comments_count": 0,
     "state": "open",
-    "title": "fix: glm5 inference bug",
-    "updated_at": "2026-03-26T09:48:35Z"
+    "title": "Fix resize failure caused by zero-sized masks in PP-DocLayoutV3",
+    "updated_at": "2026-04-07T08:54:58Z"
   },
   {
-    "additions": 55,
-    "author": "pnehete23",
+    "additions": 2654,
+    "author": "marvinzh",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes a `KeyError` in `_can_set_attn_implementation` and `_can_set_experts_implementation` when a model's module is absent from `sys.modules`. Fixes #45003 ## Root Cause Both `_can_set_attn_implementation` (line 19\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? add Qianfan-OCR model definition - `QianfanOCRForConditionalGeneration` - image-text to text model definition - `QianfanOCRModel` - backbone of image-text to text model without lm heads - `QianfanOCRProcessor` - tex\u2026",
+    "changed_files": 19,
+    "cluster_id": "cluster-42504-3",
+    "cluster_ids": [
+      "cluster-42504-3"
+    ],
+    "cluster_role": "member",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45280",
+    "created_at": "2026-04-07T06:49:34Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45280/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45280",
+    "labels": [],
+    "merged": false,
+    "number": 45280,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "add Qianfan-OCR model definition",
+    "updated_at": "2026-04-07T07:03:48Z"
+  },
+  {
+    "additions": 43,
+    "author": "sywangyi",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45015",
-    "created_at": "2026-03-26T08:52:10Z",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45279",
+    "created_at": "2026-04-07T06:40:35Z",
     "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45015/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45015",
+    "files_url": "https://github.com/huggingface/transformers/pull/45279/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45279",
     "labels": [],
     "merged": false,
-    "number": 45015,
+    "number": 45279,
     "review_comments_count": 0,
     "state": "open",
-    "title": "fix: guard sys.modules access in _can_set_attn/experts_implementation",
-    "updated_at": "2026-03-26T09:10:14Z"
+    "title": "add expert parallelism for gemma-4-26B-A4B-it",
+    "updated_at": "2026-04-07T07:08:41Z"
   },
   {
-    "additions": 16,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? #30674 refactors the way we obtain CircleCI test files to run for each job. It always puts [\"tests\"] for `tests_hub`, so each commit of each PR will run it, no matter if there is any change to codebase. Let's reduce\u2026",
+    "additions": 2,
+    "author": "kamalrajkannan78",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? - Gemma3ForConditionalGeneration.forward & Gemma3ForSequenceClassification.forward calls self.model() without return_dict=True, so @can_return_tuple silently converts the output to a plain tuple, causing outputs.pas\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45014",
-    "created_at": "2026-03-26T08:52:04Z",
-    "deletions": 3,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45014/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45014",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45277",
+    "created_at": "2026-04-07T05:24:43Z",
+    "deletions": 0,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45277/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45277",
     "labels": [],
-    "merged": true,
-    "number": 45014,
-    "review_comments_count": 1,
-    "state": "closed",
-    "title": "Don't run `tests_hub` if no tests found",
-    "updated_at": "2026-03-26T09:32:39Z"
+    "merged": false,
+    "number": 45277,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Fix AttributeError in Gemma3ForConditionalGeneration and Gemma3ForSequenceClassification when config.return_dict=False",
+    "updated_at": "2026-04-07T06:11:14Z"
   },
   {
-    "additions": 256,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Added a new checker that will `import transformers` and count the number of imported module. The checker will error out when it goes over a threshold - for instance if our code imports a lib like `torch` not lazily\u2026",
+    "additions": 9,
+    "author": "avarga1",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Problem `AutoConfig.from_pretrained(\"baidu/ERNIE-4.5-VL-28B-A3B-Paddle\", trust_remote_code=True)` raises errors that prevent the model from loading at all. Three separate bugs compound each other: ### Bug 1 \u2014 `model_type` mismatch (KeyE\u2026",
     "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45013",
-    "created_at": "2026-03-26T07:47:23Z",
-    "deletions": 0,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45275",
+    "created_at": "2026-04-07T02:22:26Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45013/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45013",
+    "files_url": "https://github.com/huggingface/transformers/pull/45275/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45275",
     "labels": [],
     "merged": false,
-    "number": 45013,
+    "number": 45275,
     "review_comments_count": 0,
     "state": "open",
-    "title": "feature: added import complexity checker",
-    "updated_at": "2026-03-26T07:57:13Z"
+    "title": "fix(ernie4_5_vl_moe): resolve three config loading failures for ERNIE-4.5-VL MoE models",
+    "updated_at": "2026-04-07T02:57:59Z"
   },
   {
-    "additions": 155,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Cache to speed up ast walks, and ast tweak",
-    "changed_files": 2,
+    "additions": 215,
+    "author": "Qubitium",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? 1. Fix: CUDA graph reuse for FA2 continuous batching was wrongly keyed causing quality collapse for specific configuration CUDA graph reuse used the wrong key: replay reuse depended on padded tensor sizes, but FA va\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45012",
-    "created_at": "2026-03-26T07:10:45Z",
-    "deletions": 2,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45274",
+    "created_at": "2026-04-07T01:43:12Z",
+    "deletions": 36,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45012/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45012",
+    "files_url": "https://github.com/huggingface/transformers/pull/45274/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45274",
     "labels": [],
     "merged": false,
-    "number": 45012,
+    "number": 45274,
     "review_comments_count": 0,
     "state": "open",
-    "title": "refactor: added cache in check_repo",
-    "updated_at": "2026-03-26T07:21:25Z"
+    "title": "Fix CB Accuracy Regression under FA2",
+    "updated_at": "2026-04-07T02:16:21Z"
   },
   {
-    "additions": 8,
-    "author": "kaixuanliu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "@ydshieh Hi, can you help review? Thx!",
+    "additions": 2,
+    "author": "excepshenal",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Liger kernel unnecessarily materializes logits in VRAM during eval with `prediction_loss_only=True`, causing OOM. We explicitly tell Liger to `skip_logits`. <!-- Congratulations! You've made it this far! You're not\u2026",
     "changed_files": 1,
-    "cluster_id": "cluster-43324-12",
-    "cluster_ids": [
-      "cluster-43324-12"
-    ],
-    "cluster_role": "member",
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45011",
-    "created_at": "2026-03-26T06:27:09Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45273",
+    "created_at": "2026-04-06T21:11:21Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45011/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45011",
+    "files_url": "https://github.com/huggingface/transformers/pull/45273/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45273",
     "labels": [],
     "merged": false,
-    "number": 45011,
+    "number": 45273,
     "review_comments_count": 0,
     "state": "open",
-    "title": "skip 2 invalid test cases for pi0 model",
-    "updated_at": "2026-03-26T06:28:28Z"
+    "title": "fix: liger unnecessarily materializes logits in VRAM during eval, causing OOM",
+    "updated_at": "2026-04-06T21:34:26Z"
   },
   {
-    "additions": 75,
-    "author": "dagecko",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Fix: CI/CD Security Vulnerabilities in GitHub Actions Hi! [Runner Guard](https://github.com/Vigilant-LLC/runner-guard), an open-source CI/CD security scanner by [Vigilant Cyber Security](https://www.vigilantdefense.com), identified secu\u2026",
-    "changed_files": 20,
+    "additions": 6,
+    "author": "yonigozlan",
+    "author_association": "MEMBER",
+    "body_excerpt": "Cc @ydshieh",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45010",
-    "created_at": "2026-03-26T06:19:29Z",
-    "deletions": 71,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45272",
+    "created_at": "2026-04-06T20:39:29Z",
+    "deletions": 20,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45010/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45010",
+    "files_url": "https://github.com/huggingface/transformers/pull/45272/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45272",
     "labels": [],
     "merged": false,
-    "number": 45010,
+    "number": 45272,
     "review_comments_count": 0,
     "state": "open",
-    "title": "fix: pin 69 unpinned action(s),extract 2 unsafe expression(s) to env vars",
-    "updated_at": "2026-03-26T06:19:29Z"
+    "title": "Fix redundant logic in video processing SmolVLM",
+    "updated_at": "2026-04-06T20:49:13Z"
   },
   {
-    "additions": 319,
-    "author": "tarekziade",
+    "additions": 90,
+    "author": "stevhliu",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This patch improves the docstring checker implementation (redundant AST walks) and adds cache. For the AST calls, 2.3x speedup check_docstrings.py --check_all on my M1: - before : 29.3s - after: 12.6s",
+    "body_excerpt": "adds a separate vlm contribution doc for more visibility instead of being hidden in the Contribute to Transformers doc, and integration tests are covered in #45152",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45009",
-    "created_at": "2026-03-26T05:31:41Z",
-    "deletions": 43,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45271",
+    "created_at": "2026-04-06T18:56:42Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45009/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45009",
+    "files_url": "https://github.com/huggingface/transformers/pull/45271/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45271",
     "labels": [],
     "merged": false,
-    "number": 45009,
-    "review_comments_count": 12,
+    "number": 45271,
+    "review_comments_count": 0,
     "state": "open",
-    "title": "refactor: speed up docstring checker",
-    "updated_at": "2026-03-26T10:10:44Z"
+    "title": "[docs] vlm addition",
+    "updated_at": "2026-04-06T19:07:27Z"
   },
   {
-    "additions": 382,
-    "author": "Krishnachaitanyakc",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary Fix type annotation bugs across config classes that cause `@strict` validation errors from `huggingface_hub`. ### Bool fields mistyped as `int` (22 fixes) Fields with boolean defaults (`True`/`False`) were annotated as `int` ins\u2026",
-    "changed_files": 198,
+    "additions": 162,
+    "author": "madhav1k",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Introduce logging of individual loss components when models return a dict of losses. - Add TrainingArguments.logging_loss_components flag to enable/disable this behavior. - Track per-component running sums with _tr_loss_components and aggr\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45007",
-    "created_at": "2026-03-25T23:12:53Z",
-    "deletions": 382,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45270",
+    "created_at": "2026-04-06T18:38:51Z",
+    "deletions": 7,
     "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/45007/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45007",
+    "files_url": "https://github.com/huggingface/transformers/pull/45270/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45270",
     "labels": [],
     "merged": false,
-    "number": 45007,
+    "number": 45270,
     "review_comments_count": 0,
     "state": "open",
-    "title": "fix: correct type annotations across config classes for @strict validation",
-    "updated_at": "2026-03-25T23:14:00Z"
+    "title": "[Trainer] Support multi-loss component logging",
+    "updated_at": "2026-04-06T18:59:17Z"
   },
   {
     "additions": 3,
-    "author": "Krishnachaitanyakc",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary Fixes #44869 Adds a bounds check in `_split_tokens_on_unicode()` in `tokenization_whisper.py` to handle trailing Unicode replacement characters (U+FFFD) at the end of decoded token streams without crashing with `IndexError`. ##\u2026",
+    "author": "ryota-komatsu",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fix minor typos in `src/transformers/utils/output_capturing.py`: - `install_output_capuring_hook` \u2192 `install_output_capturing_hook` (lines 98, 147) - `Tis` \u2192 `This` (line 152) ## Before submitting - [x] This PR fixe\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45269",
+    "created_at": "2026-04-06T18:01:42Z",
+    "deletions": 3,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45269/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45269",
+    "labels": [],
+    "merged": false,
+    "number": 45269,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Fix typos in src/transformers/utils/output_capturing.py",
+    "updated_at": "2026-04-06T20:49:06Z"
+  },
+  {
+    "additions": 3,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fix `Qwen2IntegrationTest`: - `test_speculative_generation`: - `0c89522f`: #43794 changed seed, so the actual output changed, but the expected output is not updated (cc @tarekziade more attention next time \ud83d\ude04 ) - `af\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45268",
+    "created_at": "2026-04-06T18:01:38Z",
+    "deletions": 3,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45268/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45268",
+    "labels": [],
+    "merged": true,
+    "number": 45268,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "Fix `Qwen2IntegrationTest`",
+    "updated_at": "2026-04-06T19:18:20Z"
+  },
+  {
+    "additions": 108,
+    "author": "KetanP1618",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Adds a missing docstring to the `FFN.forward` method in DistilBERT. The `FFN.forward` method in `modeling_distilbert.py` had no documentation at all. This PR adds proper Args and Returns sections following the exis\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45006",
-    "created_at": "2026-03-25T23:03:00Z",
-    "deletions": 1,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/45006/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45006",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45267",
+    "created_at": "2026-04-06T17:59:13Z",
+    "deletions": 39,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45267/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45267",
     "labels": [],
     "merged": false,
-    "number": 45006,
+    "number": 45267,
     "review_comments_count": 0,
     "state": "open",
-    "title": "fix: prevent IndexError in Whisper timestamp decode on trailing replacement char",
-    "updated_at": "2026-03-25T23:05:18Z"
+    "title": "Add docstring to FFN.forward in DistilBERT",
+    "updated_at": "2026-04-06T18:00:22Z"
   },
   {
-    "additions": 8,
-    "author": "harshaljanjani",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? The following failing tests were identified and fixed in this PR (grouped them together since they share related root causes OR the code changes were extremely minimal and didn't warrant separate PRs): \u2192 **Phi-3**\u2026",
-    "changed_files": 2,
+    "additions": 125,
+    "author": "KetanP1618",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Adds missing docstrings to two undocumented `forward` methods in the ALBERT model: - `AlbertMLMHead.forward` - Added Args and Returns sections - `AlbertSOPHead.forward` - Added Args and Returns sections Both method\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45004",
-    "created_at": "2026-03-25T19:58:57Z",
-    "deletions": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45266",
+    "created_at": "2026-04-06T17:41:14Z",
+    "deletions": 41,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45004/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45004",
+    "files_url": "https://github.com/huggingface/transformers/pull/45266/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45266",
     "labels": [],
     "merged": false,
-    "number": 45004,
+    "number": 45266,
     "review_comments_count": 0,
     "state": "open",
-    "title": "fix(testing): Fix Parakeet, Evolla, Pi0, and Phi-3 test failures on main CI",
-    "updated_at": "2026-03-26T05:57:34Z"
+    "title": "Add docstrings to AlbertMLMHead and AlbertSOPHead forward methods",
+    "updated_at": "2026-04-06T17:42:20Z"
   },
   {
     "additions": 1,
-    "author": "hmellor",
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "`None` is a valid value that can be used to disable chunked attention in `DynamicCache` and Flex Attention. hf.co/morgendave/EAGLE-Llama-4-Scout-17B-16E-Instruct is an example of a checkpoint which does this.",
+    "body_excerpt": "# What does this PR do? **Fix tf32 issue: set `torch.backends.cudnn.conv.fp32_precision` explicitly.** (#45248) breaks when running on torch 2.8 or older ...",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45002",
-    "created_at": "2026-03-25T17:40:14Z",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45263",
+    "created_at": "2026-04-06T10:06:40Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45002/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45002",
+    "files_url": "https://github.com/huggingface/transformers/pull/45263/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45263",
     "labels": [],
     "merged": true,
-    "number": 45002,
+    "number": 45263,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix type hint for `attention_chunk_size` in `Llama4TextConfig`",
-    "updated_at": "2026-03-25T20:42:11Z"
+    "title": "Add `hasattr(torch.backends.cudnn, \"conv\")` to `conftest.py`",
+    "updated_at": "2026-04-06T19:46:18Z"
   },
   {
-    "additions": 17,
-    "author": "Sai-Suraj-27",
+    "additions": 1,
+    "author": "lowzhao",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? For [torch>=2.10.0](https://docs.pytorch.org/docs/2.10/generated/torch.nn.functional.grouped_mm.html#torch-nn-functional-grouped-mm), the minimum CUDA compute capability requirement for `torch.nn.functional.grouped_\u2026",
+    "body_excerpt": "# What does this PR do? Fix docstring spelling mistake TokenizersBackend.convert_to_native_format. ```python @classmethod def convert_to_native_format(cls, trust_remote_code=False, **kwargs): \"\"\"s <---- additional s ``` Likely caused by mi\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45001",
-    "created_at": "2026-03-25T17:00:28Z",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45262",
+    "created_at": "2026-04-06T08:41:40Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45001/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45001",
+    "files_url": "https://github.com/huggingface/transformers/pull/45262/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45262",
     "labels": [],
-    "merged": false,
-    "number": 45001,
+    "merged": true,
+    "number": 45262,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Add cuda compatibility check for using `grouped_mm`",
-    "updated_at": "2026-03-26T00:24:20Z"
+    "state": "closed",
+    "title": "doc: fix TokenizersBackend.convert_to_native_format docstring",
+    "updated_at": "2026-04-06T16:32:44Z"
   },
   {
-    "additions": 21,
-    "author": "zucchini-nlp",
+    "additions": 22,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per title, after https://github.com/huggingface/transformers/pull/44976 users will be seeing a `missing_weights - lm_head not found` error even though the model doesn't use an lm head On the way also deleted unne\u2026",
-    "changed_files": 8,
+    "body_excerpt": "# What this PR does It's working, see https://github.com/huggingface/transformers/actions/runs/24025915210 (the failing job in this PR is because the workflow needs to be on `main` to be effective).",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/45000",
-    "created_at": "2026-03-25T16:28:55Z",
-    "deletions": 109,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45261",
+    "created_at": "2026-04-06T07:52:29Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/45000/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/45000",
+    "files_url": "https://github.com/huggingface/transformers/pull/45261/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45261",
     "labels": [],
-    "merged": false,
-    "number": 45000,
+    "merged": true,
+    "number": 45261,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Embedding VLMs don't need a head",
-    "updated_at": "2026-03-25T18:53:51Z"
+    "state": "closed",
+    "title": "empty",
+    "updated_at": "2026-04-06T09:24:59Z"
   },
   {
-    "additions": 1002,
-    "author": "itazap",
+    "additions": 18,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "## Summary - Auto-generated modular integration for `sarvam` - `modular_sarvam.py` written by Claude Opus 4.6 guided by `modular_model_detector.py` - `modeling_sarvam.py` regenerated from modular via `modular_model_converter.py` ## Test pl\u2026",
-    "changed_files": 4,
+    "body_excerpt": "## Summary PR #43514 refactored `_preprocess` to pass `resample=resample` to `resize`, but the `resize` method in `SmolVLMVideoProcessor` still had `interpolation` as its parameter name. The `resample` kwarg was silently swallowed by `**kw\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44999",
-    "created_at": "2026-03-25T16:21:37Z",
-    "deletions": 0,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45258",
+    "created_at": "2026-04-06T05:09:09Z",
+    "deletions": 7,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45258/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45258",
+    "labels": [],
+    "merged": true,
+    "number": 45258,
+    "review_comments_count": 7,
+    "state": "closed",
+    "title": "Fix `SmolVLM` video processor `resize` using wrong interpolation after backend refactor",
+    "updated_at": "2026-04-06T20:40:22Z"
+  },
+  {
+    "additions": 269,
+    "author": "lucianommartins",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# [Gemma4] Fix chat template and stop tokens for OpenAI tool calling compatibility ## What does this PR do? Rewrites the `_patch_template_for_openai_tool_role()` function in `convert_gemma4_weights.py` to fully support OpenAI Chat Completi\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45257",
+    "created_at": "2026-04-05T22:07:53Z",
+    "deletions": 1,
     "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44999/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44999",
+    "files_url": "https://github.com/huggingface/transformers/pull/45257/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45257",
     "labels": [],
     "merged": false,
-    "number": 44999,
+    "number": 45257,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Add sarvam model",
-    "updated_at": "2026-03-26T01:38:21Z"
+    "title": "[Gemma4] Fix chat template and stop tokens for OpenAI tool calling compatibility",
+    "updated_at": "2026-04-06T15:33:35Z"
   },
   {
-    "additions": 1179,
-    "author": "itazap",
-    "author_association": "MEMBER",
-    "body_excerpt": "## Summary - Auto-generated modular integration for `sarvam` - `modular_sarvam.py` written by Claude Opus 4.6 guided by `modular_model_detector.py` - `modeling_sarvam.py` regenerated from modular via `modular_model_converter.py` ## Test pl\u2026",
-    "changed_files": 4,
+    "additions": 39,
+    "author": "zozo123",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary When saving a Qwen3.5 VL model via `save_pretrained`, the `revert_weight_conversion` for `qwen3_5_text` replaces a leading `model.` segment. This wrongly matches keys that already start with `model.language_model.` on composite\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44997",
-    "created_at": "2026-03-25T14:23:13Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45256",
+    "created_at": "2026-04-05T19:00:26Z",
     "deletions": 0,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44997/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44997",
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45256/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45256",
     "labels": [],
     "merged": false,
-    "number": 44997,
+    "number": 45256,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add sarvam model",
-    "updated_at": "2026-03-25T14:35:45Z"
+    "title": "fix: skip qwen3_5_text checkpoint remap for nested VL language_model",
+    "updated_at": "2026-04-06T18:50:06Z"
   },
   {
-    "additions": 260,
-    "author": "3outeille",
+    "additions": 0,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "- TODO: - fix failing tests due to API change - make sure our `fsdp2` is not triggered if `accelerate` is on - Introduce `DistributedConfig` - `DistributedConfig(tp_size=2, fsdp_size=2) # plans default to \"auto\"` replaces passing separate\u2026",
-    "changed_files": 9,
+    "body_excerpt": "# What does this PR do? - `tests/models/qwen2/test_modeling_qwen2.py::Qwen2IntegrationTest::test_model_450m_logits`: - failed due to 6217adc6c8f0be7b5374e6a46129ad2214e4c6ed - `tests/models/smolvlm/test_modeling_smolvlm.py::SmolVLMForCondi\u2026",
+    "changed_files": 0,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45254",
+    "created_at": "2026-04-05T18:20:02Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45254/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45254",
+    "labels": [],
+    "merged": false,
+    "number": 45254,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Fix more integration tests for important models",
+    "updated_at": "2026-04-06T05:23:46Z"
+  },
+  {
+    "additions": 33,
+    "author": "Charly21r",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes a bug where `use_cache=False` produces garbage logits in Gemma 4 models due to broken KV sharing between layers. Fixes #45242 ## Root cause of the issue Gemma 4 introduces two architectural features not presen\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44996",
-    "created_at": "2026-03-25T14:20:25Z",
-    "deletions": 256,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45253",
+    "created_at": "2026-04-05T18:12:08Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44996/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44996",
+    "files_url": "https://github.com/huggingface/transformers/pull/45253/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45253",
     "labels": [],
     "merged": false,
-    "number": 44996,
+    "number": 45253,
     "review_comments_count": 0,
     "state": "open",
-    "title": " from_pretrained distributed refactor (FSDP2 + TP)",
-    "updated_at": "2026-03-26T10:17:40Z"
+    "title": "Fix Gemma4 `use_cache=False` producing bad logits",
+    "updated_at": "2026-04-06T07:16:38Z"
   },
   {
-    "additions": 3639,
-    "author": "itazap",
+    "additions": 4,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": null,
-    "changed_files": 8,
-    "cluster_id": "cluster-43098-11",
-    "cluster_ids": [
-      "cluster-43098-11"
-    ],
-    "cluster_role": "member",
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44994",
-    "created_at": "2026-03-25T14:02:50Z",
-    "deletions": 242,
+    "body_excerpt": "# What does this PR do? #43166 used `torch.set_float32_matmul_precision(\"high\")` which causes (likely) TF32 being used > \u201chigh\u201d, float32 matrix multiplications either use the TensorFloat32 datatype (10 mantissa bits explicitly stored) or t\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45252",
+    "created_at": "2026-04-05T16:51:29Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44994/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44994",
+    "files_url": "https://github.com/huggingface/transformers/pull/45252/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45252",
     "labels": [],
-    "merged": false,
-    "number": 44994,
+    "merged": true,
+    "number": 45252,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add sarvam model",
-    "updated_at": "2026-03-25T14:04:38Z"
+    "title": "Fix unexpected TF32 being enabled in testing",
+    "updated_at": "2026-04-05T17:32:09Z"
   },
   {
-    "additions": 303,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? `make check-repo` can be quite slow, this patch adds file-level cache to speed up checks. We get up to a 27x speedup - cold cache : 46s - warm cache : 1.6s",
-    "changed_files": 3,
+    "additions": 91,
+    "author": "balgaly",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Problem \\ orch.multinomial\\ rejects last dimensions \\>= 2**24\\. Beam search with \\do_sample=True\\ builds a flat distribution of size \\ um_beams * vocab_size\\, which can exceed that limit (e.g. large beams + ~164k vocab), crashing during\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44992",
-    "created_at": "2026-03-25T11:40:46Z",
-    "deletions": 17,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45251",
+    "created_at": "2026-04-05T15:38:58Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44992/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44992",
+    "files_url": "https://github.com/huggingface/transformers/pull/45251/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45251",
     "labels": [],
     "merged": false,
-    "number": 44992,
+    "number": 45251,
     "review_comments_count": 0,
     "state": "open",
-    "title": "refactoring: speedup static checks",
-    "updated_at": "2026-03-26T07:12:21Z"
+    "title": "fix(generation): beam sample when num_beams * vocab_size exceeds multinomial limit",
+    "updated_at": "2026-04-05T15:53:23Z"
   },
   {
-    "additions": 8,
-    "author": "ArthurZucker",
+    "additions": 16,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? - BC for check model inputs",
+    "body_excerpt": "# What does this PR do? PR #42428 change the way to enable / disable torch's TF32 using torch new API. It turns out set > torch.backends.fp32_precision = False would still have > torch.backends.cudnn.conv.fp32_precision = \"tf32\" > torch.ba\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44990",
-    "created_at": "2026-03-25T10:26:20Z",
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45248",
+    "created_at": "2026-04-05T07:51:44Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44990/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44990",
+    "files_url": "https://github.com/huggingface/transformers/pull/45248/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45248",
     "labels": [],
     "merged": true,
-    "number": 44990,
+    "number": 45248,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "More small vllm fixes",
-    "updated_at": "2026-03-25T13:05:44Z"
+    "title": "Fix tf32 issue: set `torch.backends.cudnn.conv.fp32_precision` explicitly.",
+    "updated_at": "2026-04-05T10:35:41Z"
   },
   {
-    "additions": 1,
-    "author": "3outeille",
-    "author_association": "MEMBER",
-    "body_excerpt": "- Steps breakdown: - FSDP + TP: - https://github.com/huggingface/transformers/pull/44083 - [Request](https://github.com/huggingface/transformers/pull/44083#pullrequestreview-3975401342) to use our loading method https://github.com/huggingf\u2026",
+    "additions": 3,
+    "author": "gagandhakrey",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "\u2026 validation # What does this PR do? Problem The invert_attention_mask function in src/transformers/modeling_utils.py crashed with an UnboundLocalError when given an encoder_attention_mask shape that wasn't exactly 2D or 3D. Because it onl\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44989",
-    "created_at": "2026-03-25T09:10:02Z",
-    "deletions": 0,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44989/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44989",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45247",
+    "created_at": "2026-04-05T03:45:23Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45247/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45247",
     "labels": [],
     "merged": false,
-    "number": 44989,
+    "number": 45247,
     "review_comments_count": 0,
     "state": "open",
-    "title": "\ud83d\udea8 Distributed training API",
-    "updated_at": "2026-03-25T16:16:45Z"
+    "title": "Fix UnboundLocalError in invert_attention_mask by adding proper shape\u2026",
+    "updated_at": "2026-04-05T03:51:22Z"
   },
   {
-    "additions": 584,
-    "author": "tarekziade",
+    "additions": 0,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Adds Rule 14 ``` if _tied_weights_keys is present and non-empty in modeling -> Config MUST contain the tie_word_embeddings field ```",
-    "changed_files": 9,
+    "body_excerpt": "# What does this PR do? Let's CI go great!!!!",
+    "changed_files": 0,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44988",
-    "created_at": "2026-03-25T07:08:20Z",
-    "deletions": 2,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45244",
+    "created_at": "2026-04-04T18:52:19Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44988/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44988",
+    "files_url": "https://github.com/huggingface/transformers/pull/45244/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45244",
     "labels": [],
     "merged": false,
-    "number": 44988,
-    "review_comments_count": 1,
+    "number": 45244,
+    "review_comments_count": 0,
     "state": "open",
-    "title": "typing: add rule 14 - checks for tie_word_embeddings presence",
-    "updated_at": "2026-03-26T09:52:02Z"
+    "title": "Let's CI go great",
+    "updated_at": "2026-04-04T19:59:28Z"
   },
   {
-    "additions": 0,
-    "author": "Krishnachaitanyakc",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary Fixes #44855 On Python 3.13, placing a `# Copied from` comment between `@torch.jit.script` and the function definition causes an `IndentationError`. This happens because `torch.jit.script` calls `inspect.getsource()` followed by\u2026",
-    "changed_files": 2,
+    "additions": 2,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Use torch 2.11 for our (daily) CI since it's released for 2 weeks already. For CircleCI, we need to fix something regarding `torchvision.io.read_video`. For daily CI, torch 2.11 doesn't cause issues (for those `torc\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44986",
-    "created_at": "2026-03-25T03:18:31Z",
-    "deletions": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45243",
+    "created_at": "2026-04-04T18:09:27Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44986/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44986",
+    "files_url": "https://github.com/huggingface/transformers/pull/45243/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45243",
     "labels": [],
     "merged": true,
-    "number": 44986,
+    "number": 45243,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: remove Copied from comments between @torch.jit.script and def for Python 3.13 compat",
-    "updated_at": "2026-03-25T13:39:54Z"
+    "title": "Nvidia CI with `torch 2.11`",
+    "updated_at": "2026-04-04T18:48:45Z"
   },
   {
-    "additions": 2,
-    "author": "Krishnachaitanyakc",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary Fixes #44913 When creating a `GPTNeoXConfig` (or `GPTNeoXJapaneseConfig`) with a non-default `rotary_pct`, the value is lost after a `save_pretrained` / `from_pretrained` round-trip. This happens because `convert_rope_params_to_\u2026",
+    "additions": 523,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? After the series of fixes in other previous PRs, we can now update the tiny model creation script. This update makes the script running without any failure, just 10 warnings. There are many # TODO, some of them may\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44985",
-    "created_at": "2026-03-25T02:15:04Z",
-    "deletions": 2,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45241",
+    "created_at": "2026-04-04T12:30:35Z",
+    "deletions": 164,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44985/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44985",
+    "files_url": "https://github.com/huggingface/transformers/pull/45241/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45241",
     "labels": [],
-    "merged": false,
-    "number": 44985,
-    "review_comments_count": 1,
-    "state": "open",
-    "title": "fix: preserve rotary_pct across save/load cycle in GPTNeoX configs",
-    "updated_at": "2026-03-25T13:46:44Z"
+    "merged": true,
+    "number": 45241,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Update tiny model creation script",
+    "updated_at": "2026-04-04T17:19:34Z"
   },
   {
-    "additions": 2,
-    "author": "Butanium",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? `maybe_autocast` calls `torch.is_autocast_enabled(device_type)` which raises a `RuntimeError` when `device_type` is `\"meta\"`: ``` RuntimeError: unknown device type for autocast in get_autocast_dispatch_key_from_dev\u2026",
+    "additions": 4,
+    "author": "shhKnight30",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Type checking for `PreTrainedConfig` subclasses broke in v5.4.0 and this fixes it. The culprit is `wrap_init_to_accept_kwargs` \u2014 it swaps out the dataclass-generated `__init__` with a `(**kwargs: Any)` wrapper at r\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44984",
-    "created_at": "2026-03-25T01:39:23Z",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45240",
+    "created_at": "2026-04-04T10:29:57Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44984/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44984",
+    "files_url": "https://github.com/huggingface/transformers/pull/45240/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45240",
     "labels": [],
-    "merged": true,
-    "number": 44984,
+    "merged": false,
+    "number": 45240,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix `maybe_autocast` crashing on meta device tensors",
-    "updated_at": "2026-03-25T17:45:03Z"
+    "state": "open",
+    "title": "fix: restore mypy type checking for PreTrainedConfig subclasses (#45071)",
+    "updated_at": "2026-04-07T02:32:17Z"
   },
   {
-    "additions": 29,
-    "author": "Hyungkeun-Park-Nota",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes `save_pretrained()` failure for GPT-OSS models loaded with `Mxfp4Config(dequantize=True)`. When Triton/kernels are unavailable, transformers automatically falls back to `dequantize=True`, converting MXFP4 wei\u2026",
-    "changed_files": 2,
+    "additions": 28,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? We have introduced `CausalLMModelTest` for some time, but haven't update `get_test_info.py` accordingly, which causes some issues, in particularly for tiny model creation, regarding the part of the attribute `all_mo\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44983",
-    "created_at": "2026-03-25T01:19:59Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45238",
+    "created_at": "2026-04-04T07:25:15Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44983/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44983",
+    "files_url": "https://github.com/huggingface/transformers/pull/45238/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45238",
     "labels": [],
-    "merged": false,
-    "number": 44983,
+    "merged": true,
+    "number": 45238,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "fix: implement Mxfp4Dequantize.reverse_op for save_pretrained support",
-    "updated_at": "2026-03-26T02:23:28Z"
+    "state": "closed",
+    "title": "Update `get_test_info.py` (related to tiny model creation)",
+    "updated_at": "2026-04-04T07:40:21Z"
   },
   {
-    "additions": 108,
-    "author": "AkshajKashyap",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Fixes #43039 ## What does this PR do? When `prediction_loss_only=True` during evaluation and `use_liger_kernel=True`, `Trainer.prediction_step` now passes `skip_logits=True` to the model forward if the forward signature supports it and lab\u2026",
-    "changed_files": 2,
+    "additions": 2,
+    "author": "KoichiYasuoka",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes the bug in `resize_token_embeddings` (may occur python3.10 or after). Quick reproduce: ``` from transformers import AutoModelForMaskedLM mdl = AutoModelForMaskedLM.from_pretrained(\"bert-base-uncased\") f = mdl.\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44981",
-    "created_at": "2026-03-25T00:38:02Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45236",
+    "created_at": "2026-04-04T07:05:53Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44981/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44981",
+    "files_url": "https://github.com/huggingface/transformers/pull/45236/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45236",
     "labels": [],
     "merged": false,
-    "number": 44981,
+    "number": 45236,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Trainer: set skip_logits for loss-only eval when liger enabled",
-    "updated_at": "2026-03-25T22:07:33Z"
+    "state": "closed",
+    "title": "resize_token_embeddings does not resize lm_head",
+    "updated_at": "2026-04-07T00:55:39Z"
   },
   {
-    "additions": 6,
+    "additions": 102,
     "author": "kallewoof",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Pre-patch unnecessarily breaks merging a LoRA adapter with a model using CUDA_VISIBLE_DEVICES= e.g. when VRAM is insufficient. It also breaks non-cuda machine operations (such as merging). # What does this PR do? This PR un-breaks `CUDA_VI\u2026",
-    "changed_files": 6,
+    "body_excerpt": "# What does this PR do? This PR adds a tiny \"agnostic.gpu\" utility that is meant to allow easy replacing of unnecessarily hard-coded vendor-specific code. The code does not use `torch.accelerator` as it is still considered experimental, bu\u2026",
+    "changed_files": 8,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44980",
-    "created_at": "2026-03-24T23:50:07Z",
-    "deletions": 6,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44980/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44980",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45235",
+    "created_at": "2026-04-04T06:08:22Z",
+    "deletions": 13,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45235/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45235",
     "labels": [],
     "merged": false,
-    "number": 44980,
+    "number": 45235,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "bug-fix: do not assume torch.cuda is available when setting up norm values, even if flash linear attention is available",
-    "updated_at": "2026-03-25T23:17:21Z"
+    "state": "open",
+    "title": "feat/rfc/poc: Agnostic GPU",
+    "updated_at": "2026-04-04T13:17:00Z"
   },
   {
-    "additions": 492,
-    "author": "michaelbenayoun",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Introduces `src/transformers/module_fusion.py`, a utility for fusing adjacent submodules in a model into a single FusedModule that executes them as a chain in one forward pass. The key components are: - `RegistryCol\u2026",
-    "changed_files": 2,
+    "additions": 30,
+    "author": "pdufour",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This fixes an issue with the padding logic in the forward pass for timesfm not being Onnx export compatible. Specifically this condition: ``` if input_len < context_len: ``` Will give this error when you try to expo\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44979",
-    "created_at": "2026-03-24T22:33:31Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45233",
+    "created_at": "2026-04-04T00:15:04Z",
+    "deletions": 30,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44979/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44979",
+    "files_url": "https://github.com/huggingface/transformers/pull/45233/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45233",
     "labels": [],
     "merged": false,
-    "number": 44979,
+    "number": 45233,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Module Fusion API",
-    "updated_at": "2026-03-25T21:01:32Z"
+    "title": "feat: make timesfm2_5 onnx export compatible",
+    "updated_at": "2026-04-05T01:00:07Z"
   },
   {
-    "additions": 4,
-    "author": "cjkindel",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? `_can_set_attn_implementation` and `_can_set_experts_implementation` both do a direct subscript lookup into `sys.modules`: ```python class_module = sys.modules[cls.__module__] ``` If the module is not registered und\u2026",
-    "changed_files": 1,
+    "additions": 439,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "adds docs for static model rules so model contributors are aware of expectations > I wonder if it wouldn't make sense to auto generate that portion of the doc automatically added from @tarekziade feedback: - reformats `format_rule_details(\u2026",
+    "changed_files": 8,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44978",
-    "created_at": "2026-03-24T21:01:11Z",
-    "deletions": 4,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45232",
+    "created_at": "2026-04-03T22:41:22Z",
+    "deletions": 146,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44978/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44978",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/45232/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45232",
+    "labels": [],
     "merged": false,
-    "number": 44978,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: handle absent sys.modules entry in modeling_utils",
-    "updated_at": "2026-03-25T18:28:53Z"
+    "number": 45232,
+    "review_comments_count": 1,
+    "state": "open",
+    "title": "[docs] static model rules",
+    "updated_at": "2026-04-07T08:24:23Z"
   },
   {
-    "additions": 2,
-    "author": "hmellor",
+    "additions": 20,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "- Adds a type hint to `ModernVBertForMaskedLM.__init__` - Removes `tie_word_embeddings` from `Qwen2VLTextConfig` (and therefore also `Qwen2_5_VLTextConfig`) because it's not valid for these models - Remove hack from `ColQwen2Config` (and t\u2026",
-    "changed_files": 6,
+    "body_excerpt": "# What does this PR do? All of this are trivial ... (maybe except \"evolla\")",
+    "changed_files": 10,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44976",
-    "created_at": "2026-03-24T19:26:33Z",
-    "deletions": 10,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45228",
+    "created_at": "2026-04-03T18:17:29Z",
+    "deletions": 12,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44976/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44976",
+    "files_url": "https://github.com/huggingface/transformers/pull/45228/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45228",
     "labels": [],
     "merged": true,
-    "number": 44976,
-    "review_comments_count": 3,
+    "number": 45228,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix tie_word_embedding issues with `Qwen2VL`",
-    "updated_at": "2026-03-24T20:55:15Z"
+    "title": "More fix for tiny model creation",
+    "updated_at": "2026-04-03T19:25:51Z"
   },
   {
-    "additions": 6971,
-    "author": "philippguevorguian",
+    "additions": 3,
+    "author": "akhilc08",
     "author_association": "NONE",
-    "body_excerpt": null,
-    "changed_files": 20,
+    "body_excerpt": "## Summary - Remove the unused `PILImageResampling` runtime import from `video_processing_utils.py` which causes an `ImportError` when Pillow is not installed - Also remove the now-unused `is_vision_available` import that only guarded the\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44975",
-    "created_at": "2026-03-24T17:12:31Z",
-    "deletions": 2,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45227",
+    "created_at": "2026-04-03T17:44:25Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44975/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44975",
+    "files_url": "https://github.com/huggingface/transformers/pull/45227/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45227",
     "labels": [],
     "merged": false,
-    "number": 44975,
+    "number": 45227,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: rebase main; clean config reads, ImageProcessor backend, misc cleanup",
-    "updated_at": "2026-03-24T17:13:42Z"
+    "title": "fix: remove nonexistent PILImageResampling import from video_processing_utils",
+    "updated_at": "2026-04-05T17:04:49Z"
   },
   {
-    "additions": 799,
-    "author": "3outeille",
-    "author_association": "MEMBER",
-    "body_excerpt": null,
-    "changed_files": 6,
+    "additions": 3,
+    "author": "akhilc08",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - Fixes an `IndexError: string index out of range` crash in `_split_tokens_on_unicode()` when the decoded token stream ends with a dangling Unicode replacement character (U+FFFD) - Adds a bounds check so that when `unicode_offse\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44974",
-    "created_at": "2026-03-24T16:13:25Z",
-    "deletions": 82,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45226",
+    "created_at": "2026-04-03T17:43:21Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44974/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44974",
+    "files_url": "https://github.com/huggingface/transformers/pull/45226/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45226",
     "labels": [],
     "merged": false,
-    "number": 44974,
+    "number": 45226,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Refactor core_model_loading to support FSDP shard-on-read loading",
-    "updated_at": "2026-03-25T17:00:57Z"
+    "state": "closed",
+    "title": "fix: handle trailing replacement character in Whisper word timestamp decoding",
+    "updated_at": "2026-04-05T17:04:49Z"
   },
   {
-    "additions": 22,
-    "author": "andylizf",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Adds `.item()` to `max_seqlen = (cu_seqlens[1:] - cu_seqlens[:-1]).max()` in all vision attention modules that pass this value to `flash_attn_varlen_func`. ### Context On **released versions** (e.g. 4.52.4), using\u2026",
-    "changed_files": 19,
+    "additions": 6,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? The `dev` extra now indirectly pulls hf-doc-builder so the install step failed. We also need to update to current main for the latest features",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44973",
-    "created_at": "2026-03-24T15:42:32Z",
-    "deletions": 22,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45225",
+    "created_at": "2026-04-03T16:46:54Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44973/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44973",
+    "files_url": "https://github.com/huggingface/transformers/pull/45225/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45225",
     "labels": [],
-    "merged": false,
-    "number": 44973,
+    "merged": true,
+    "number": 45225,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Fix max_seqlen type in vision attention for torch.compile + FA2",
-    "updated_at": "2026-03-25T14:12:50Z"
+    "state": "closed",
+    "title": "fix: hf-doc-builder insallation was failing",
+    "updated_at": "2026-04-03T17:02:09Z"
   },
   {
-    "additions": 17,
-    "author": "Abdennacer-Badaoui",
+    "additions": 2,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "As per title. Updating Gemma3/Gemma3n expectations.",
-    "changed_files": 3,
+    "body_excerpt": "# What does this PR do? Fix for tiny model",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44972",
-    "created_at": "2026-03-24T15:11:50Z",
-    "deletions": 12,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45224",
+    "created_at": "2026-04-03T16:39:23Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44972/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44972",
+    "files_url": "https://github.com/huggingface/transformers/pull/45224/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45224",
     "labels": [],
     "merged": true,
-    "number": 44972,
-    "review_comments_count": 10,
+    "number": 45224,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[AMD CI] Gemma3/Gemma3n Expectations",
-    "updated_at": "2026-03-24T16:30:03Z"
+    "title": "remove unnecessary entries in some auto model mappings",
+    "updated_at": "2026-04-03T17:26:24Z"
   },
   {
-    "additions": 0,
-    "author": "ArthurZucker",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Removed the tokenizer_class attr was never there to begin with, and kwargs are now supported. This was failing some test on vllm ci. Fixes https://buildkite.com/vllm/ci/builds/57601/steps/canvas?sid=019d1aec-aa5a-41\u2026",
-    "changed_files": 4,
+    "additions": 17,
+    "author": "Talhax55z",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Fixes #31356 ## What does this PR do? The `postprocess` method in `ObjectDetectionPipeline` was hardcoding `raw_annotations[0]`, which caused batch inference to only return results for the first image, ignoring all others. This PR replaces\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44971",
-    "created_at": "2026-03-24T14:59:36Z",
-    "deletions": 11,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45223",
+    "created_at": "2026-04-03T16:36:25Z",
+    "deletions": 15,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44971/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44971",
+    "files_url": "https://github.com/huggingface/transformers/pull/45223/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45223",
     "labels": [],
-    "merged": true,
-    "number": 44971,
-    "review_comments_count": 1,
-    "state": "closed",
-    "title": "[ `vllm x v5`] nit",
-    "updated_at": "2026-03-24T17:40:05Z"
+    "merged": false,
+    "number": 45223,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Fix: ObjectDetectionPipeline batch inference only returns first image results",
+    "updated_at": "2026-04-06T10:31:26Z"
   },
   {
-    "additions": 20,
-    "author": "IlyasMoutawwakil",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 5,
+    "additions": 12,
+    "author": "jashshah999",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary When using Gemma 3 or Gemma 4 for text-only supervised fine-tuning (no images), the forward pass raises a `ValueError` because `token_type_ids` / `mm_token_type_ids` is not provided. This happens because `AutoTokenizer` does not\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44970",
-    "created_at": "2026-03-24T13:49:21Z",
-    "deletions": 76,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45222",
+    "created_at": "2026-04-03T16:27:31Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44970/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44970",
+    "files_url": "https://github.com/huggingface/transformers/pull/45222/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45222",
     "labels": [],
-    "merged": true,
-    "number": 44970,
-    "review_comments_count": 1,
-    "state": "closed",
-    "title": "Fix CPU 16 bytes alignment issue using equivalent fallback",
-    "updated_at": "2026-03-25T09:01:03Z"
+    "merged": false,
+    "number": 45222,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "fix(gemma3, gemma4): default token_type_ids to zeros for text-only training",
+    "updated_at": "2026-04-03T16:28:41Z"
   },
   {
-    "additions": 4,
-    "author": "tarekziade",
+    "additions": 6,
+    "author": "eustlb",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Extends the CI so we can use Make and read toml files",
-    "changed_files": 3,
+    "body_excerpt": "# What does this PR do? As per title",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44968",
-    "created_at": "2026-03-24T11:43:24Z",
-    "deletions": 2,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45221",
+    "created_at": "2026-04-03T15:49:24Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44968/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44968",
+    "files_url": "https://github.com/huggingface/transformers/pull/45221/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45221",
     "labels": [],
     "merged": false,
-    "number": 44968,
+    "number": 45221,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Added Make to the docker and `tomli` to `.[quality]`",
-    "updated_at": "2026-03-24T15:06:29Z"
+    "state": "open",
+    "title": "user friendly error when loading audio from video",
+    "updated_at": "2026-04-03T20:08:44Z"
   },
   {
-    "additions": 87,
-    "author": "Qubitium",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fix: FA kernel launches currently are not thread-safe (nogil) in multi-gpu env. This simple patch fixes the issue. ```py # Set the correct CUDA context before launching the FlashAttention kernel. with torch.cuda.dev\u2026",
-    "changed_files": 2,
+    "additions": 346,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR adds transformers serve compatibility to multimodal models like qwen omni or gemma 4. We add support for audio with chat completion and response though `input_audio` -> the client need to base64-encode the a\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44967",
-    "created_at": "2026-03-24T11:33:45Z",
-    "deletions": 84,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45220",
+    "created_at": "2026-04-03T14:16:33Z",
+    "deletions": 37,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44967/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44967",
+    "files_url": "https://github.com/huggingface/transformers/pull/45220/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45220",
     "labels": [],
     "merged": false,
-    "number": 44967,
-    "review_comments_count": 0,
+    "number": 45220,
+    "review_comments_count": 5,
     "state": "open",
-    "title": "Fix FA kernel launch needs correct cuda device ctx in multi-gpu env",
-    "updated_at": "2026-03-26T10:09:41Z"
+    "title": "Multimodal serve support ",
+    "updated_at": "2026-04-03T19:57:08Z"
   },
   {
-    "additions": 8,
-    "author": "pramilajangid",
-    "author_association": "NONE",
-    "body_excerpt": "Fixes #44964 ## Summary This PR restores backward compatibility for `CommonKwargs` in `transformers.processing_utils`, which is still referenced by some remote processor implementations. ## Problem After the typed-dict cleanup (commit `533\u2026",
+    "additions": 3,
+    "author": "sywangyi",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? google/gemma-4-26B-A4B-it tp 2, memory is 46G per rank wo the change, drop to about 25G w per rank with the change - text models: @ArthurZucker @Cyrilvallez",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44966",
-    "created_at": "2026-03-24T11:06:57Z",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45219",
+    "created_at": "2026-04-03T13:59:02Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44966/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44966",
+    "files_url": "https://github.com/huggingface/transformers/pull/45219/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45219",
     "labels": [],
     "merged": false,
-    "number": 44966,
+    "number": 45219,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix backward compatibility for CommonKwargs in processing_utils (brea\u2026",
-    "updated_at": "2026-03-24T12:48:44Z"
+    "state": "open",
+    "title": "reduce memory for gemma4 moe model in tp",
+    "updated_at": "2026-04-03T14:29:30Z"
   },
   {
-    "additions": 37,
-    "author": "ydshieh",
+    "additions": 4057,
+    "author": "LysandreJik",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 2,
+    "body_excerpt": "This PR offers a new, Agentic surface for transformers. It tries to apply what is done elsewhere with CLIs to `transformers`, leveraging many current use-cases of `transformers` and exposing them as CLI endpoints. I recommend reading this\u2026",
+    "changed_files": 15,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44965",
-    "created_at": "2026-03-24T10:59:31Z",
-    "deletions": 32,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44965/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44965",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45218",
+    "created_at": "2026-04-03T13:31:08Z",
+    "deletions": 0,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45218/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45218",
     "labels": [],
     "merged": false,
-    "number": 44965,
+    "number": 45218,
     "review_comments_count": 0,
     "state": "open",
-    "title": "try",
-    "updated_at": "2026-03-24T11:19:27Z"
+    "title": "Proposal: Agent-first CLI",
+    "updated_at": "2026-04-06T12:42:44Z"
   },
   {
-    "additions": 3,
-    "author": "josh-kean",
+    "additions": 5,
+    "author": "ENg-122",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes an import in src/transformers/video_processing_utils.py that was causing the main build to fail Fixes # 44933 ## Code Agent Policy The Transformers repo is currently being overwhelmed by a large number of PRs\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? Remove unnecessary masked_fill_(mask, 0) call in torch_chunk_gated_delta_rule. The decay_mask computed earlier already encodes the causal/lower-triangular structure (upper-triangle values are zero), so masking the a\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44958",
-    "created_at": "2026-03-23T20:07:09Z",
-    "deletions": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45215",
+    "created_at": "2026-04-03T09:08:28Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44958/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44958",
-    "labels": [],
-    "merged": false,
-    "number": 44958,
-    "review_comments_count": 1,
-    "state": "open",
-    "title": "fixed import error with PILImageResampling",
-    "updated_at": "2026-03-24T13:53:00Z"
-  },
-  {
-    "additions": 1473,
-    "author": "bigshanedogg",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "> **Draft PR \u2014 waiting for issue approval.** This PR is opened alongside the issue request. > It will be marked ready for review after a maintainer gives the go-ahead on the issue. # What does this PR do? Adds native Transformers support f\u2026",
-    "changed_files": 12,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44956",
-    "created_at": "2026-03-23T19:34:30Z",
-    "deletions": 0,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44956/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44956",
+    "files_url": "https://github.com/huggingface/transformers/pull/45215/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45215",
     "labels": [],
     "merged": false,
-    "number": 44956,
+    "number": 45215,
     "review_comments_count": 0,
     "state": "open",
-    "title": "[WIP] Add HyperCLOVAX model",
-    "updated_at": "2026-03-23T19:38:26Z"
+    "title": "[Qwen3_5]Remove unnecessary masked_fill_ in torch_chunk_gated_delta_rule attention computation: \"attn = (q_i @ k_i.transpose(-1, -2) * decay_mask[:, :, i]).masked_fill_(mask, 0)\"",
+    "updated_at": "2026-04-03T10:01:51Z"
   },
   {
-    "additions": 0,
-    "author": "stevhliu",
-    "author_association": "MEMBER",
-    "body_excerpt": "removes outdated qa pipeline reference",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
+    "additions": 45,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR fixes failed test case: `tests/models/cohere_asr/test_modeling_cohere_asr.py::CohereAsrModelTest::test_model_parallel_beam_search`, and add some adjustment to make the test cases pass for Intel XPU device. @ydshieh pls help review,\u2026",
+    "changed_files": 3,
+    "cluster_id": "cluster-43324-21",
+    "cluster_ids": [
+      "cluster-43324-21"
+    ],
+    "cluster_role": "member",
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44954",
-    "created_at": "2026-03-23T17:20:37Z",
-    "deletions": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45214",
+    "created_at": "2026-04-03T08:32:34Z",
+    "deletions": 14,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44954/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44954",
+    "files_url": "https://github.com/huggingface/transformers/pull/45214/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45214",
     "labels": [],
     "merged": false,
-    "number": 44954,
+    "number": 45214,
     "review_comments_count": 0,
     "state": "open",
-    "title": "[docs] pipeline cleanup",
-    "updated_at": "2026-03-23T17:30:10Z"
+    "title": "cohere_asr: fix bug for model_parallel_beam_search test case",
+    "updated_at": "2026-04-03T08:33:38Z"
   },
   {
-    "additions": 861,
-    "author": "zucchini-nlp",
+    "additions": 6052,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Decouples `kwargs` manipulation from hub's strict decorator, and ensures that all subclasses of a `PreTrainedConfig` accept any kwargs which is what we supported prev. Not all remote code has `@strict` or has an `__\u2026",
-    "changed_files": 536,
+    "body_excerpt": "comparison https://github.com/huggingface/transformers/blob/937d61b9fa00001da1a0680ecf8061b5990fbcd7/sarvam_moe_comparison.md # What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once mer\u2026",
+    "changed_files": 26,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44953",
-    "created_at": "2026-03-23T17:13:39Z",
-    "deletions": 824,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44953/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44953",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45213",
+    "created_at": "2026-04-03T08:25:49Z",
+    "deletions": 209,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45213/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45213",
     "labels": [],
-    "merged": true,
-    "number": 44953,
+    "merged": false,
+    "number": 45213,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Config kwargs",
-    "updated_at": "2026-03-24T14:14:46Z"
+    "state": "open",
+    "title": "DO NOT MERGE - model creation skill",
+    "updated_at": "2026-04-03T12:26:09Z"
   },
   {
-    "additions": 10,
-    "author": "Jess-Co-Del",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes the non existence of output dictionary change, when parameter output_hidden_states=True is passed to models like CLIP or SigLip. This is especially pertinent for the vision model config. According to #42759 no\u2026",
+    "additions": 23,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "@ydshieh pls help review, thx!",
     "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44952",
-    "created_at": "2026-03-23T17:02:50Z",
-    "deletions": 2,
+    "cluster_id": "cluster-43324-21",
+    "cluster_ids": [
+      "cluster-43324-21"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45212",
+    "created_at": "2026-04-03T07:44:35Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44952/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44952",
+    "files_url": "https://github.com/huggingface/transformers/pull/45212/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45212",
     "labels": [],
     "merged": false,
-    "number": 44952,
+    "number": 45212,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Fix: Add correct return behaviour when output_hidden_states=True for CLIP and SIGLIP vision models",
-    "updated_at": "2026-03-24T11:19:35Z"
+    "title": "musicflamingo: add test support for Intel XPU device",
+    "updated_at": "2026-04-03T07:45:42Z"
   },
   {
-    "additions": 113,
-    "author": "hemantmm",
+    "additions": 4,
+    "author": "matdou",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This pull request adds routing replay functionality for mixture-of-experts (MoE) model types by giving users the option to override router probabilities while processing a forward pass through their models. <!-- Con\u2026",
+    "body_excerpt": "Fixes #45208 # What does this PR do? This PR corrects an incorrect return type in `Qwen3MoeSparseMoeBlock.forward`. The method was annotated as returning `tuple[torch.Tensor, torch.Tensor]`, while the implementation returns a `torch.Tensor\u2026",
     "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44951",
-    "created_at": "2026-03-23T16:29:46Z",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45211",
+    "created_at": "2026-04-03T07:44:32Z",
     "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44951/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44951",
+    "files_url": "https://github.com/huggingface/transformers/pull/45211/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45211",
     "labels": [],
     "merged": false,
-    "number": 44951,
+    "number": 45211,
     "review_comments_count": 0,
     "state": "open",
-    "title": "feat: Add router_logits override to enable Routing Replay for MoE models",
-    "updated_at": "2026-03-25T13:06:31Z"
+    "title": "[Qwen3MoE] Fix wrong return type annotation in Qwen3MoeSparseMoeBlock.forward",
+    "updated_at": "2026-04-03T08:16:32Z"
   },
   {
-    "additions": 1287,
-    "author": "Cyrilvallez",
+    "additions": 11,
+    "author": "ArthurZucker",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. This PR finally makes mamba layer caches first class citizen, and adds native support for them. It supports the following layers combinations: - all mamba layers - alternating attention layer/mamba\u2026",
-    "changed_files": 63,
+    "body_excerpt": "# What does this PR do? Commits that got in the release branch to allow pushing",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44950",
-    "created_at": "2026-03-23T16:25:13Z",
-    "deletions": 4119,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45210",
+    "created_at": "2026-04-03T05:42:21Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44950/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44950",
+    "files_url": "https://github.com/huggingface/transformers/pull/45210/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45210",
     "labels": [],
-    "merged": false,
-    "number": 44950,
+    "merged": true,
+    "number": 45210,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "[Cache] Native mamba & hybrid cache",
-    "updated_at": "2026-03-25T17:26:15Z"
+    "state": "closed",
+    "title": "Fix pypi release",
+    "updated_at": "2026-04-03T06:40:39Z"
   },
   {
-    "additions": 80,
-    "author": "Charly21r",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes #44936 This PR fixes an issue with `NotebookProgressCallback` in the `Trainer` where calling evaluate() before or after training would crash due to the training tracker being `None`. The callback now properly\u2026",
-    "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44949",
-    "created_at": "2026-03-23T16:07:50Z",
-    "deletions": 1,
+    "additions": 3,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "@ydshieh pls help review, thx!",
+    "changed_files": 1,
+    "cluster_id": "cluster-43324-21",
+    "cluster_ids": [
+      "cluster-43324-21"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45209",
+    "created_at": "2026-04-03T05:40:36Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44949/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44949",
+    "files_url": "https://github.com/huggingface/transformers/pull/45209/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45209",
     "labels": [],
     "merged": false,
-    "number": 44949,
+    "number": 45209,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Fix: NotebookProgressCallback crash when evaluating with the Trainer",
-    "updated_at": "2026-03-26T07:25:54Z"
+    "title": "nomic_bert: make the test suitable for general device.",
+    "updated_at": "2026-04-03T05:41:42Z"
   },
   {
-    "additions": 1,
-    "author": "heycorgi",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 1,
+    "additions": 39,
+    "author": "w4nderlust",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Fixes #45206 ## What does this PR do? Adds documentation for the Gemma4 Per-Layer Embeddings (PLE) system, which is currently pretty hard to reverse-engineer from the code alone. I ran into this while implementing Gemma4 inference from scr\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44948",
-    "created_at": "2026-03-23T15:33:56Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45207",
+    "created_at": "2026-04-03T05:15:47Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44948/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44948",
+    "files_url": "https://github.com/huggingface/transformers/pull/45207/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45207",
     "labels": [],
     "merged": false,
-    "number": 44948,
+    "number": 45207,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Create aa.py",
-    "updated_at": "2026-03-23T15:34:35Z"
+    "state": "open",
+    "title": "[Gemma4] Add docstrings for Per-Layer Embeddings (PLE) pipeline",
+    "updated_at": "2026-04-03T05:16:54Z"
   },
   {
-    "additions": 117,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? The doc was generated by Claude. I deleted unnecessary repetitions and fixed a few moments to be more precise. We don't really need to merge it now so if you think the text is too LLM, feel free to take this as an i\u2026",
+    "additions": 108,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "@ydshieh pls help review, thx!",
+    "changed_files": 3,
+    "cluster_id": "cluster-43324-21",
+    "cluster_ids": [
+      "cluster-43324-21"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45204",
+    "created_at": "2026-04-03T02:32:39Z",
+    "deletions": 6,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45204/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45204",
+    "labels": [],
+    "merged": false,
+    "number": 45204,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "fix bug for videomt model device mismatch",
+    "updated_at": "2026-04-03T02:33:47Z"
+  },
+  {
+    "additions": 68,
+    "author": "Qubitium",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Disable FlashAttention support for Gemm4 which FA cannot suport due to global.head-dim=512. I am very confused at the current code/test for Gemma4. I ran real inference using transformer `main` and `fa` throws head-\u2026",
     "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44947",
-    "created_at": "2026-03-23T13:23:04Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45202",
+    "created_at": "2026-04-02T23:37:22Z",
+    "deletions": 47,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45202/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45202",
+    "labels": [],
+    "merged": false,
+    "number": 45202,
+    "review_comments_count": 3,
+    "state": "open",
+    "title": "Fix gemma4 has flash-attention incompatbile head-dim=512",
+    "updated_at": "2026-04-07T02:24:44Z"
+  },
+  {
+    "additions": 3,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? The following **Wav2Vec2PhonemeCTC** use cases were identified and fixed in this PR: \u2192 [05c0e1d (\"rm slow tokenizers\")](https://github.com/huggingface/transformers/pull/40936) added [self.backend = kwargs.pop(\"bac\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45199",
+    "created_at": "2026-04-02T20:03:22Z",
     "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44947/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44947",
+    "files_url": "https://github.com/huggingface/transformers/pull/45199/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45199",
     "labels": [],
     "merged": false,
-    "number": 44947,
-    "review_comments_count": 16,
+    "number": 45199,
+    "review_comments_count": 0,
     "state": "open",
-    "title": "Add doc page for capturing outputs",
-    "updated_at": "2026-03-25T19:48:57Z"
+    "title": "fix(models): Resolve regressions in Wav2Vec2PhonemeCTCTokenizer (wav2vec2-lv-60-espeak-cv-ft)",
+    "updated_at": "2026-04-03T04:07:51Z"
   },
   {
-    "additions": 14,
-    "author": "BSchilperoort",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 13,
+    "additions": 104,
+    "author": "douglas-reid",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Fixes issues with the Gemma 4 model docs. Mainly, this is updating examples to point at the actual models, with FC and Audio examples added. - [ X ] I confirm that this is not a pure code agent PR. ## Before submitting - [ X ] This PR fixe\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44946",
-    "created_at": "2026-03-23T12:18:34Z",
-    "deletions": 14,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45197",
+    "created_at": "2026-04-02T19:05:35Z",
+    "deletions": 42,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44946/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44946",
+    "files_url": "https://github.com/huggingface/transformers/pull/45197/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45197",
     "labels": [],
     "merged": true,
-    "number": 44946,
+    "number": 45197,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Correct docstrings for `from_pretrained` (url input deprecated)",
-    "updated_at": "2026-03-23T13:05:16Z"
+    "title": "fix(docs): correct gemma4 docs and examples",
+    "updated_at": "2026-04-02T22:23:16Z"
   },
   {
-    "additions": 71,
-    "author": "zucchini-nlp",
+    "additions": 1,
+    "author": "stevhliu",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? @hmellor",
-    "changed_files": 5,
+    "body_excerpt": "fixes `<hfoptions>` tag",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44943",
-    "created_at": "2026-03-23T10:58:40Z",
-    "deletions": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45196",
+    "created_at": "2026-04-02T18:02:44Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44943/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44943",
+    "files_url": "https://github.com/huggingface/transformers/pull/45196/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45196",
     "labels": [],
     "merged": true,
-    "number": 44943,
-    "review_comments_count": 1,
+    "number": 45196,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Clearer type hints and fix rope validation in configs",
-    "updated_at": "2026-03-23T13:32:11Z"
+    "title": "[docs] formatting",
+    "updated_at": "2026-04-03T09:37:52Z"
   },
   {
-    "additions": 220,
-    "author": "hmellor",
+    "additions": 90,
+    "author": "yonigozlan",
     "author_association": "MEMBER",
-    "body_excerpt": null,
-    "changed_files": 3,
+    "body_excerpt": "# What this PR does Adds a new `load_image_as_tensor` utility leveraging torchvision's `decode_image` to `image_utils.py` and overrides `fetch_images` in `TorchvisionBackend` to use it. Previously, all image loading went through PIL regard\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44942",
-    "created_at": "2026-03-23T10:46:23Z",
-    "deletions": 5,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44942/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44942",
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45195",
+    "created_at": "2026-04-02T17:58:51Z",
+    "deletions": 20,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45195/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45195",
     "labels": [],
     "merged": false,
-    "number": 44942,
-    "review_comments_count": 0,
+    "number": 45195,
+    "review_comments_count": 4,
     "state": "open",
-    "title": "Add inference time layer fusion optimisations via `PreTrainedModel.from_pretrained(fuse_layers=True)`",
-    "updated_at": "2026-03-25T16:14:19Z"
+    "title": " Use torchvision `decode_image` to load images in the torchvision backend",
+    "updated_at": "2026-04-06T20:41:41Z"
   },
   {
-    "additions": 4,
-    "author": "ydshieh",
+    "additions": 222,
+    "author": "zucchini-nlp",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fix the failing job after #43514 (the fix is effefctive, see [here](https://github.com/huggingface/transformers/actions/runs/23433395911/job/68165255513?pr=44941)) [Update Transformers metadata](https://github.com/h\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? Fixes: - Replace `PretrainedConfig` with `PreTrainedConfig` - Don't import from other models inside `configuration_model.py`, instead resolve via modular - Text/vision sub-configs are documented (only kosmos was mis\u2026",
+    "changed_files": 23,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44941",
-    "created_at": "2026-03-23T10:42:09Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45194",
+    "created_at": "2026-04-02T16:39:06Z",
+    "deletions": 40,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44941/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44941",
+    "files_url": "https://github.com/huggingface/transformers/pull/45194/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45194",
     "labels": [],
-    "merged": true,
-    "number": 44941,
+    "merged": false,
+    "number": 45194,
     "review_comments_count": 1,
-    "state": "closed",
-    "title": "Fix failing job `Update Transformers metadata` after #43514",
-    "updated_at": "2026-03-23T13:41:39Z"
+    "state": "open",
+    "title": "Configuration insoncistencies",
+    "updated_at": "2026-04-02T16:59:48Z"
   },
   {
-    "additions": 138,
-    "author": "Qubitium",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Model loading of same model path but 2 different threads (2 different instances) have meta device tensor issues: unloaded meta/empty embedding/lm-head when it should not be empty post model load. Cause: `tie_weight(\u2026",
-    "changed_files": 3,
+    "additions": 15,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/45070 Though I am convinced that configs should not be pyndatic BaseClass, just because we already wrap all subclasses as dataclass. You never know what happe\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44940",
-    "created_at": "2026-03-23T09:55:57Z",
-    "deletions": 10,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45193",
+    "created_at": "2026-04-02T15:36:56Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44940/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44940",
+    "files_url": "https://github.com/huggingface/transformers/pull/45193/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45193",
     "labels": [],
     "merged": false,
-    "number": 44940,
-    "review_comments_count": 3,
+    "number": 45193,
+    "review_comments_count": 0,
     "state": "open",
-    "title": "fix tie_weights skipping logic is not tied to model thread scope",
-    "updated_at": "2026-03-24T15:24:41Z"
+    "title": "Config can apply pyndatic validation without torch-dependence",
+    "updated_at": "2026-04-02T17:00:43Z"
   },
   {
-    "additions": 2038,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Refactored and cleaned up model linter - separated package - one rule per module - refactored legacy checks into their own rules - simplified pattern, duplication removal",
-    "changed_files": 25,
+    "additions": 9896,
+    "author": "RyanMullins",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "--------- # What does this PR do? model previously unable to use tools ## Code Agent Policy The Transformers repo is currently being overwhelmed by a large number of PRs and issue comments written by code agents. We are currently bottlenec\u2026",
+    "changed_files": 41,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44939",
-    "created_at": "2026-03-23T08:45:36Z",
-    "deletions": 1446,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45192",
+    "created_at": "2026-04-02T14:35:18Z",
+    "deletions": 79,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44939/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44939",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/45192/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45192",
+    "labels": [
+      "New model"
+    ],
     "merged": true,
-    "number": 44939,
-    "review_comments_count": 5,
+    "number": 45192,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "refactor: mlinter as its own package",
-    "updated_at": "2026-03-24T07:56:15Z"
+    "title": "casually dropping the most capable open weights on the planet",
+    "updated_at": "2026-04-03T12:28:06Z"
   },
   {
-    "additions": 2,
-    "author": "VanshikaSohal",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes two small but impactful bugs in the BART documentation: 1. **Variable shadowing bug**: In the Pipeline example, the variable was named `pipeline` which shadows the imported `pipeline` function. Renamed to `fi\u2026",
+    "additions": 13,
+    "author": "saslifat-gif",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "The Qwen2 tokenizer test file had no custom test methods \u2014 only integration constants inherited from TokenizerTesterMixin. This PR adds a test documenting two untested edge cases in decode(): **Before (no test, behavior undocumented):** ``\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44935",
-    "created_at": "2026-03-22T18:45:01Z",
-    "deletions": 2,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45191",
+    "created_at": "2026-04-02T14:08:15Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44935/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44935",
+    "files_url": "https://github.com/huggingface/transformers/pull/45191/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45191",
     "labels": [],
-    "merged": true,
-    "number": 44935,
+    "merged": false,
+    "number": 45191,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix variable shadowing in pipeline example and typo in BART docs (BERT \u2192 BART)",
-    "updated_at": "2026-03-23T14:28:04Z"
+    "state": "open",
+    "title": "Add edge case tests for out-of-range token id decoding in Qwen2 tokenizer",
+    "updated_at": "2026-04-02T14:54:31Z"
   },
   {
-    "additions": 9,
-    "author": "Sai-Suraj-27",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes this failing [T5ModelIntegrationTest](https://github.com/huggingface/transformers/actions/runs/23230643883/job/67524758706#step:14:1449) & this [Qwen2IntegrationTest](https://github.com/huggingface/transformer\u2026",
-    "changed_files": 2,
-    "cluster_id": "cluster-43453-9",
-    "cluster_ids": [
-      "cluster-43453-9"
-    ],
-    "cluster_role": "member",
+    "additions": 92,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This patch extends ty check to src/transformers/cli. Supersedes https://github.com/huggingface/transformers/pull/44566. I've added some of the changes in this PR",
+    "changed_files": 10,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44934",
-    "created_at": "2026-03-22T18:03:34Z",
-    "deletions": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45190",
+    "created_at": "2026-04-02T13:54:23Z",
+    "deletions": 34,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44934/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44934",
+    "files_url": "https://github.com/huggingface/transformers/pull/45190/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45190",
     "labels": [],
-    "merged": true,
-    "number": 44934,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix failing `T5ModelIntegrationTest`",
-    "updated_at": "2026-03-24T14:50:10Z"
+    "merged": false,
+    "number": 45190,
+    "review_comments_count": 2,
+    "state": "open",
+    "title": "Fix ty for transformers cli",
+    "updated_at": "2026-04-03T09:34:56Z"
   },
   {
-    "additions": 1,
-    "author": "r266-tech",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes #44908 The `get_inverse_sqrt_schedule` function accepts `timescale` and `last_epoch` parameters, but `get_scheduler` was not forwarding `scheduler_specific_kwargs` to it. This caused user-provided kwargs like\u2026",
-    "changed_files": 1,
+    "additions": 409,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "## Summary - Add two optional, backward-compatible inputs (`test_path_prefix`, `test_file_suffix`) to `model_jobs.yml` and `self-scheduled.yml` \u2014 defaults preserve all existing behavior - Extend the `set-matrix` step in `self-scheduled.yml\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44932",
-    "created_at": "2026-03-22T17:30:56Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45189",
+    "created_at": "2026-04-02T13:43:29Z",
+    "deletions": 185,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44932/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44932",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/45189/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45189",
+    "labels": [],
     "merged": false,
-    "number": 44932,
+    "number": 45189,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix: Pass scheduler_specific_kwargs to inverse_sqrt scheduler",
-    "updated_at": "2026-03-23T12:44:16Z"
+    "state": "open",
+    "title": "Add doc test CI workflow reusing existing model job infrastructure",
+    "updated_at": "2026-04-03T07:30:17Z"
   },
   {
-    "additions": 1,
-    "author": "r266-tech",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes a v5 regression where `CamembertForMaskedLM` (and all CamemBERT masked-LM tasks) produces near-zero, near-uniform logits, making the model completely non-functional. ### Root cause In v5, `modeling_utils.get_\u2026",
+    "additions": 5,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do ? This PR fixes the `test_register_result_handler`. Not sure how it passed in the past when i added it but since CB returns `generated_tokens` from the same list to avoid copy, len(results[i].generated_tokens) for i\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 9,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44931",
-    "created_at": "2026-03-22T17:28:57Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45188",
+    "created_at": "2026-04-02T13:15:57Z",
+    "deletions": 8,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44931/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44931",
+    "files_url": "https://github.com/huggingface/transformers/pull/45188/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45188",
     "labels": [],
     "merged": true,
-    "number": 44931,
+    "number": 45188,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(camembert): add tie_word_embeddings=True to CamembertConfig",
-    "updated_at": "2026-03-25T07:09:37Z"
+    "title": "fix `test_register_result_handler`",
+    "updated_at": "2026-04-03T09:37:31Z"
   },
   {
-    "additions": 103,
-    "author": "javierdejesusda",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary - Fixes #44912 \u2014 MXFP4 quantization error messages combine `is_triton_available()` and `is_kernels_available()` into a single `kernels_available` boolean, making it impossible to identify which dependency is missing - Split the\u2026",
-    "changed_files": 2,
+    "additions": 3,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Close file handler.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44930",
-    "created_at": "2026-03-22T17:27:20Z",
-    "deletions": 13,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45187",
+    "created_at": "2026-04-02T13:10:31Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44930/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44930",
+    "files_url": "https://github.com/huggingface/transformers/pull/45187/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45187",
     "labels": [],
-    "merged": true,
-    "number": 44930,
-    "review_comments_count": 3,
-    "state": "closed",
-    "title": "fix: split MXFP4 dependency checks for specific error messages",
-    "updated_at": "2026-03-24T15:33:14Z"
+    "merged": false,
+    "number": 45187,
+    "review_comments_count": 1,
+    "state": "open",
+    "title": "Close file handler",
+    "updated_at": "2026-04-03T10:01:34Z"
   },
   {
-    "additions": 26,
-    "author": "ydshieh",
+    "additions": 7248,
+    "author": "zucchini-nlp",
     "author_association": "MEMBER",
-    "body_excerpt": "## Problem In `TokenizersBackend.convert_to_native_format()`, when a tokenizer has a custom `__init__` (the `elif` branch), `tokenizer.json` was parsed **twice**: 1. `TokenizerFast.from_file(fast_tokenizer_file)` \u2014 full Rust parse includin\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? Same as https://github.com/huggingface/transformers/pull/40962 but cleans up the code to match transformers API. Couldn't test due to errors, the integration test is failing atm. Still need to clean the testing file\u2026",
+    "changed_files": 21,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44927",
-    "created_at": "2026-03-22T15:33:23Z",
-    "deletions": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45186",
+    "created_at": "2026-04-02T12:29:46Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44927/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44927",
+    "files_url": "https://github.com/huggingface/transformers/pull/45186/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45186",
     "labels": [],
-    "merged": true,
-    "number": 44927,
-    "review_comments_count": 6,
-    "state": "closed",
-    "title": "fix: improve processor loading performance by avoiding redundant tokenizer parsing",
-    "updated_at": "2026-03-23T11:03:52Z"
+    "merged": false,
+    "number": 45186,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Add new model: Isaac ",
+    "updated_at": "2026-04-02T12:55:56Z"
   },
   {
-    "additions": 25,
-    "author": "yonigozlan",
+    "additions": 57,
+    "author": "zucchini-nlp",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Solve import errors when trying to import `from transformers.models.llama4.image_processing_llama4_fast import Llama4ImageProcessorFast` for example",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? If we have videos, the token type ids will be `2` but the current fn checks only image token types. This PR generalizes it rely only on `vision_group_ids` instead of token types",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44926",
-    "created_at": "2026-03-22T14:46:17Z",
-    "deletions": 0,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45185",
+    "created_at": "2026-04-02T11:35:53Z",
+    "deletions": 104,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44926/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44926",
+    "files_url": "https://github.com/huggingface/transformers/pull/45185/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45185",
     "labels": [],
     "merged": true,
-    "number": 44926,
-    "review_comments_count": 1,
+    "number": 45185,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Fix backward compatibility for full path imports of Fast Image Processors",
-    "updated_at": "2026-03-23T14:16:49Z"
+    "title": "Generalize gemma vision mask to videos",
+    "updated_at": "2026-04-02T13:15:46Z"
   },
   {
-    "additions": 482,
-    "author": "kashif",
+    "additions": 425,
+    "author": "remi-or",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR adds a first-class MoE routing capture/replay API for Qwen2Moe and introduces shared MoE routing helpers for reuse by other MoE model families. It adds: - a structured `MoERouting` payload in modeling output\u2026",
-    "changed_files": 7,
+    "body_excerpt": "# Summary This PR adds CPU offloading to continuous batching. It's in raft until perf and test status are reported. When the GPU KV cache is full and a request must be evicted, we check if there is enough VRAM to copy the request's KV cach\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44925",
-    "created_at": "2026-03-22T14:04:40Z",
-    "deletions": 24,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44925/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44925",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45184",
+    "created_at": "2026-04-02T10:12:00Z",
+    "deletions": 52,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45184/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45184",
     "labels": [],
     "merged": false,
-    "number": 44925,
+    "number": 45184,
     "review_comments_count": 0,
     "state": "open",
-    "title": "[MOE]  MoE routing capture and replay support",
-    "updated_at": "2026-03-24T12:49:30Z"
+    "title": "[CB] [Major] Add CPU request offloading",
+    "updated_at": "2026-04-03T13:40:52Z"
   },
   {
-    "additions": 9,
-    "author": "Qubitium",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fix two `nogil` threading bugs (reproduced on 3.14) : 1. Continus Batching crashes with torch graph errors with 2 threads on 2 separate model instances (same model path, but two distinct instances). Cause is missing\u2026",
-    "changed_files": 1,
+    "additions": 232,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? The `transformers` CLI currently does a lot of work before command dispatch. In particular, the top-level entrypoint eagerly imports `transformers` and CLI subcommands with heavy dependencies, so even simple invocat\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 13,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44924",
-    "created_at": "2026-03-22T11:46:49Z",
-    "deletions": 1,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45181",
+    "created_at": "2026-04-02T08:03:40Z",
+    "deletions": 23,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45181/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45181",
+    "labels": [],
+    "merged": false,
+    "number": 45181,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Make the cli a top-level package",
+    "updated_at": "2026-04-02T14:00:58Z"
+  },
+  {
+    "additions": 48,
+    "author": "paulinebm",
+    "author_association": "MEMBER",
+    "body_excerpt": "## \ud83d\udd12 Pin GitHub Actions to commit SHAs This PR pins all GitHub Actions to their exact commit SHA instead of mutable tags or branch names. **Why?** Pinning to a SHA prevents supply chain attacks where a tag (e.g. `v4`) could be moved to poi\u2026",
+    "changed_files": 18,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45180",
+    "created_at": "2026-04-02T08:00:02Z",
+    "deletions": 48,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44924/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44924",
+    "files_url": "https://github.com/huggingface/transformers/pull/45180/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45180",
     "labels": [],
     "merged": true,
-    "number": 44924,
-    "review_comments_count": 1,
+    "number": 45180,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Continuous batching thread safety",
-    "updated_at": "2026-03-24T05:42:56Z"
+    "title": "\ud83d\udd12 Pin GitHub Actions to commit SHAs",
+    "updated_at": "2026-04-02T09:12:55Z"
   },
   {
-    "additions": 3,
-    "author": "prakhar-agarwal",
+    "additions": 327,
+    "author": "remi-or",
+    "author_association": "MEMBER",
+    "body_excerpt": "## Summary This PR ads minor changes to `cache.update`, updates the memory handler with all new features and refactors a few parts of the code to make it more readable. Cache indexing: - Replace fancy indexing (cache[idx, :, :]) with expli\u2026",
+    "changed_files": 6,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45179",
+    "created_at": "2026-04-02T06:15:08Z",
+    "deletions": 288,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45179/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45179",
+    "labels": [],
+    "merged": true,
+    "number": 45179,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "[CB] Tweaks to update and minor fixes",
+    "updated_at": "2026-04-03T09:36:51Z"
+  },
+  {
+    "additions": 4966,
+    "author": "masoudpz",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Addresses issue #44843. Verified with isolated repro logic. Changes made: Updated the logic to properly identify local and offline scenarios upfront. Now, is_local is correctly set to True if: 1. is_offline_mode() is active. 2. The local_f\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 20,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44923",
-    "created_at": "2026-03-22T05:20:22Z",
-    "deletions": 1,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44923/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44923",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45176",
+    "created_at": "2026-04-02T00:47:45Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45176/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45176",
     "labels": [],
     "merged": false,
-    "number": 44923,
+    "number": 45176,
     "review_comments_count": 0,
     "state": "open",
-    "title": "fix: avoid unconditional model_info call in _patch_mistral_regex",
-    "updated_at": "2026-03-22T05:24:11Z"
+    "title": "added efficietvitsam model to HF",
+    "updated_at": "2026-04-02T16:01:34Z"
   },
   {
-    "additions": 10,
-    "author": "s-zx",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Fixes #44849. When `output_hidden_states=True` (or `output_attentions=True`) is passed to `model.generate()`, the `@capture_outputs` decorator reads the flag value but leaves it in `**kwargs`. These flags then prop\u2026",
-    "changed_files": 1,
+    "additions": 49,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "updates the docs with changes from #44796: - added section for `--compile` in the serve optimization docs - added section for `--model-timeout` in the Loading models section (useful when a model is silently kicked off and a user doesn't kn\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44922",
-    "created_at": "2026-03-22T01:21:22Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45174",
+    "created_at": "2026-04-01T23:29:39Z",
+    "deletions": 21,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44922/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44922",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44922,
+    "files_url": "https://github.com/huggingface/transformers/pull/45174/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45174",
+    "labels": [],
+    "merged": true,
+    "number": 45174,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "[docs] transformers serve",
+    "updated_at": "2026-04-02T16:39:12Z"
+  },
+  {
+    "additions": 16,
+    "author": "JJJYmmm",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixed the `qwen3_5` / `qwen3_5_moe` reverse-loading tests by correcting the text model type used in the setup, and aligned the reverse-mapping behavior with gemma3n since they are all native multimodal. This also re\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45173",
+    "created_at": "2026-04-01T20:20:37Z",
+    "deletions": 90,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45173/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45173",
+    "labels": [],
+    "merged": true,
+    "number": 45173,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: pop output_* flags from kwargs in capture_outputs to prevent submodule leakage",
-    "updated_at": "2026-03-23T12:38:56Z"
+    "title": "[misc] fix qwen35 tests: correct the text model type and skip reverse_mapping",
+    "updated_at": "2026-04-02T13:05:39Z"
   },
   {
-    "additions": 4,
-    "author": "s-zx",
+    "additions": 156,
+    "author": "ezylopx5",
     "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Fixes #44918. `compute_3d_position_ids` in the Qwen2.5-VL / Qwen3-VL / Qwen3.5 model families destructures `inputs_embeds.shape` into exactly three variables: ```python batch_size, seq_length, _ = inputs_embeds.sha\u2026",
-    "changed_files": 4,
+    "body_excerpt": "## Problem Transformers currently provides sampling filters such as top-k, top-p, min-p, and top-h, but does not include top-n-sigma sampling from \"Top-n\u03c3: Not All Logits Are You Need\". This makes it harder to use a temperature-invariant t\u2026",
+    "changed_files": 8,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44921",
-    "created_at": "2026-03-22T00:39:01Z",
-    "deletions": 4,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45172",
+    "created_at": "2026-04-01T19:25:12Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44921/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44921",
+    "files_url": "https://github.com/huggingface/transformers/pull/45172/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45172",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44921,
+    "number": 45172,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: use shape index access in compute_3d_position_ids for Qwen VL models",
-    "updated_at": "2026-03-23T10:00:51Z"
+    "title": "Add TopNSigmaLogitsWarper and top_n_sigma generation config support",
+    "updated_at": "2026-04-04T06:24:26Z"
   },
   {
-    "additions": 15,
-    "author": "s-zx",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Fixes `num_labels` not being propagated from `Qwen3_5Config` to its `text_config` when loading via `AutoConfig.from_pretrained(model, num_labels=N)`. **Root cause:** `Qwen3_5Config.__post_init__` initializes `text_\u2026",
+    "additions": 134,
+    "author": "Kash6",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "When input_boxes contains a mix of None and real box entries (e.g. input_boxes=[None, [[x1,y1,x2,y2]]]), the processor pads None entries with [-10,-10,0,0] but does not generate corresponding input_boxes_labels. The model's geometry encode\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44920",
-    "created_at": "2026-03-22T00:01:59Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45171",
+    "created_at": "2026-04-01T18:26:48Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44920/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44920",
+    "files_url": "https://github.com/huggingface/transformers/pull/45171/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45171",
     "labels": [],
     "merged": false,
-    "number": 44920,
+    "number": 45171,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: propagate num_labels/id2label to text_config in Qwen3_5Config",
-    "updated_at": "2026-03-23T12:06:04Z"
+    "state": "open",
+    "title": "Fix Sam3Processor missing input_boxes_labels for padded None entries",
+    "updated_at": "2026-04-03T23:38:13Z"
   },
   {
-    "additions": 18,
-    "author": "s-zx",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Fixes a crash in `Qwen2_5_VLProcessor.__call__` when processing batched inputs without padding (`padding=False`). **Root cause:** When the tokenizer returns sequences of different lengths (ragged list), `np.array(t\u2026",
-    "changed_files": 1,
+    "additions": 19,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Before I forget, opening a PR. Will cause conflicts in https://github.com/huggingface/transformers/pull/44431, so I will better merge this after refactoring",
+    "changed_files": 10,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44919",
-    "created_at": "2026-03-21T23:57:37Z",
-    "deletions": 5,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45170",
+    "created_at": "2026-04-01T17:50:33Z",
+    "deletions": 17,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44919/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44919",
+    "files_url": "https://github.com/huggingface/transformers/pull/45170/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45170",
     "labels": [],
     "merged": false,
-    "number": 44919,
+    "number": 45170,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: handle ragged batch inputs in Qwen2_5_VLProcessor mm_token_type_ids computation",
-    "updated_at": "2026-03-23T10:38:30Z"
+    "state": "open",
+    "title": "`layrnorm` -> `layernorm`",
+    "updated_at": "2026-04-01T18:03:31Z"
   },
   {
-    "additions": 5,
-    "author": "s-zx",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary `GPTNeoXConfig.convert_rope_params_to_dict` unconditionally overwrote `rope_parameters[\"partial_rotary_factor\"]` with the default `0.25` when `rotary_pct` was absent from kwargs. On every `from_pretrained` call, `rotary_pct` is\u2026",
-    "changed_files": 1,
+    "additions": 10,
+    "author": "hmellor",
+    "author_association": "MEMBER",
+    "body_excerpt": "In https://github.com/huggingface/transformers/pull/45094 I introduced some errors to the remote code resolution when trying to detect if local code belonged to Transformers or not. These tests were: ```bash pytest tests/models/cohere_asr/\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44917",
-    "created_at": "2026-03-21T23:34:32Z",
-    "deletions": 1,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45169",
+    "created_at": "2026-04-01T15:27:43Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44917/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44917",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44917,
+    "files_url": "https://github.com/huggingface/transformers/pull/45169/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45169",
+    "labels": [],
+    "merged": true,
+    "number": 45169,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(gpt-neox): preserve rotary_pct across save/load cycle",
-    "updated_at": "2026-03-23T12:37:48Z"
+    "title": "Fix explicit local code resolution for tokenizers and image processors",
+    "updated_at": "2026-04-01T21:48:02Z"
   },
   {
-    "additions": 8,
-    "author": "s-zx",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Importing `DebertaV2Model` (or anything that depends on it, e.g. `gliner`) raises `IndentationError` on Python 3.13 because `torch.jit.script` calls `inspect.getsource()`, dedents the snippet, and passes it to `ast.parse()`. Pyt\u2026",
-    "changed_files": 2,
+    "additions": 4,
+    "author": "w601sxs",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Based on our experimentation min and max lr for LLMs need to be set properly as defaults. Please refer to paper. For the broader community 1e-7 to 1e-4 are decent defaults # What does this PR do? <!-- Congratulations! You've made it this f\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44916",
-    "created_at": "2026-03-21T23:34:07Z",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45168",
+    "created_at": "2026-04-01T15:02:15Z",
     "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44916/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44916",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/45168/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45168",
+    "labels": [],
     "merged": false,
-    "number": 44916,
+    "number": 45168,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix(deberta-v2): move \"Copied from\" comments above @torch.jit.script for Python 3.13 compat",
-    "updated_at": "2026-03-23T12:34:24Z"
+    "state": "open",
+    "title": "Update min_lr and max_lr default values to better defaults",
+    "updated_at": "2026-04-06T18:46:51Z"
   },
   {
-    "additions": 90,
-    "author": "maxsloef-goodfire",
+    "additions": 16,
+    "author": "xu-song",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? `clean_up_tokenization` applies English-specific string replacements (` .` \u2192 `.`, ` ?` \u2192 `?`, ` ,` \u2192 `,`, etc.) to decoded text. This was designed for BERT-era WordPiece tokenizers where decoding produced artifacts\u2026",
-    "changed_files": 4,
+    "body_excerpt": "# What does this PR do? This PR adds support for [Anthropic's JSON function style](https://platform.claude.com/docs/en/agents-and-tools/tool-use/define-tools): `{\"name\": \"...\", \"description\": \"...\", \"input_schema\": {...}}` ## Usage Example\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44915",
-    "created_at": "2026-03-21T20:45:03Z",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45167",
+    "created_at": "2026-04-01T14:50:33Z",
     "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44915/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44915",
+    "files_url": "https://github.com/huggingface/transformers/pull/45167/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45167",
     "labels": [],
     "merged": false,
-    "number": 44915,
-    "review_comments_count": 1,
+    "number": 45167,
+    "review_comments_count": 0,
     "state": "open",
-    "title": "fix: skip `clean_up_tokenization` for BPE tokenizers in `PreTrainedTokenizerFast`",
-    "updated_at": "2026-03-23T18:45:52Z"
+    "title": "Add anthropic style of function schema",
+    "updated_at": "2026-04-03T05:49:56Z"
   },
   {
-    "additions": 1,
-    "author": "maxsloef-goodfire",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? The `Llama3Converter` in `convert_llama_weights_to_hf.py` hardcodes `clean_up_tokenization_spaces=True` (line 468). This causes `tokenizer.decode()` to silently strip spaces before punctuation for all converted Lla\u2026",
-    "changed_files": 1,
+    "additions": 67,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "We didn't think we needed them, but I think we do after all!",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44914",
-    "created_at": "2026-03-21T20:25:51Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45166",
+    "created_at": "2026-04-01T14:46:33Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44914/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44914",
+    "files_url": "https://github.com/huggingface/transformers/pull/45166/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45166",
     "labels": [],
     "merged": true,
-    "number": 44914,
+    "number": 45166,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: set `clean_up_tokenization_spaces=False` in Llama 3 tokenizer conversion",
-    "updated_at": "2026-03-23T08:38:18Z"
+    "title": "Re-add regex substitutions to the response parsing spec",
+    "updated_at": "2026-04-01T15:46:34Z"
   },
   {
-    "additions": 8,
-    "author": "ouroborosscr",
-    "author_association": "FIRST_TIMER",
-    "body_excerpt": "Qwen3.5 uses 3D position_ids [3, batch, seq_len] for multi-dimensional rotary embedding. _is_packed_sequence() misinterprets this as a packed sequence, causing cu_seqlens to be constructed with 3x the actual token count. Flash attention th\u2026",
-    "changed_files": 1,
+    "additions": 368,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Emu3 was not updated in recent refactor and blip files were swapped. This PR fixes it Do we need anything to support BC importing from old files, or does it happen in `LazyImports` @yonigozlan ?",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44911",
-    "created_at": "2026-03-21T15:42:57Z",
-    "deletions": 4,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45165",
+    "created_at": "2026-04-01T14:18:38Z",
+    "deletions": 543,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44911/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44911",
+    "files_url": "https://github.com/huggingface/transformers/pull/45165/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45165",
     "labels": [],
     "merged": false,
-    "number": 44911,
+    "number": 45165,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix flash attention crash with 3D position_ids (Qwen3.5)",
-    "updated_at": "2026-03-24T14:35:57Z"
+    "state": "open",
+    "title": "Fix missing image processors backends",
+    "updated_at": "2026-04-06T21:05:38Z"
   },
   {
     "additions": 1,
-    "author": "anshuS1310",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "The `get_scheduler` function was identifying the `inverse_sqrt` scheduler type but failing to pass `**scheduler_specific_kwargs` to the underlying `get_inverse_sqrt_schedule` function. This caused user-defined parameters like `timescale` t\u2026",
+    "author": "albertvillanova",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fix TypeError: 'NoneType' object is not iterable in `GenerationMixin.generate` - Fix for None layer_types <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is goin\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44909",
-    "created_at": "2026-03-21T09:59:07Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45164",
+    "created_at": "2026-04-01T13:53:41Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44909/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44909",
+    "files_url": "https://github.com/huggingface/transformers/pull/45164/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45164",
     "labels": [],
     "merged": true,
-    "number": 44909,
+    "number": 45164,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix: Update optimization.py",
-    "updated_at": "2026-03-24T13:06:15Z"
+    "title": "Fix TypeError: 'NoneType' object is not iterable in GenerationMixin.generate",
+    "updated_at": "2026-04-03T04:57:18Z"
   },
   {
-    "additions": 200,
-    "author": "syncdoth",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Fixes #44906 ## Summary - Remove `.expand_as(inputs_embeds)` from placeholder mask creation in `get_placeholder_mask` and equivalent inline patterns across all VLM models. `masked_scatter` natively broadcasts `(B, S, 1)` \u2192 `(B, S, H)`, mak\u2026",
-    "changed_files": 71,
+    "additions": 131,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Makes sure the full error is displayed on errors",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44907",
-    "created_at": "2026-03-21T06:07:35Z",
-    "deletions": 222,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45163",
+    "created_at": "2026-04-01T13:41:18Z",
+    "deletions": 20,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44907/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44907",
+    "files_url": "https://github.com/huggingface/transformers/pull/45163/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45163",
     "labels": [],
     "merged": false,
-    "number": 44907,
+    "number": 45163,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Remove unnecessary expand_as in get_placeholder_mask across VLMs",
-    "updated_at": "2026-03-23T12:20:03Z"
+    "title": "tweak checkers output on errors",
+    "updated_at": "2026-04-01T14:30:59Z"
   },
   {
-    "additions": 13,
-    "author": "NicoleRobin",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary - 13 i18n README files used `./awesome-transformers.md` which resolves relative to the `i18n/` directory and leads to a 404 - Replace with the absolute GitHub URL so links work from any location - `README_ko.md` was already corr\u2026",
-    "changed_files": 13,
+    "additions": 513,
+    "author": "onwp",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Description Re-creates the Turkish documentation that was accidentally removed during the TF/Flax cleanup (commit fce74651). This PR adds the foundational Turkish docs with the complete \"Get Started\" section. ### Files added - `docs/sou\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44905",
-    "created_at": "2026-03-21T03:25:56Z",
-    "deletions": 13,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45159",
+    "created_at": "2026-04-01T05:15:32Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44905/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44905",
+    "files_url": "https://github.com/huggingface/transformers/pull/45159/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45159",
+    "labels": [],
+    "merged": false,
+    "number": 45159,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add Turkish documentation: Get Started section",
+    "updated_at": "2026-04-01T05:16:49Z"
+  },
+  {
+    "additions": 525,
+    "author": "onwp",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary This PR re-creates the Turkish (`tr`) documentation for the Transformers library, starting with the **Get Started** section. The original Turkish translation was accidentally removed in commit fce74651 (#40999). This contributio\u2026",
+    "changed_files": 6,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45158",
+    "created_at": "2026-04-01T05:05:40Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45158/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45158",
     "labels": [],
     "merged": true,
-    "number": 44905,
+    "number": 45158,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(i18n): replace broken relative links to awesome-transformers.md with absolute URLs",
-    "updated_at": "2026-03-23T12:47:56Z"
+    "title": "Add Turkish (tr) translation for Get Started section",
+    "updated_at": "2026-04-02T17:50:46Z"
   },
   {
-    "additions": 101,
-    "author": "vivekvar-dl",
+    "additions": 190,
+    "author": "Qubitium",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Add PrismML 1bit (gguf based, group-size 128) model inference suppport. ## Code Agent Policy - [ ] I confirm that this is not a pure code agent PR. ## Before submitting - [ ] This PR fixes a typo or improves the doc\u2026",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45157",
+    "created_at": "2026-04-01T03:11:51Z",
+    "deletions": 23,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45157/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45157",
+    "labels": [],
+    "merged": false,
+    "number": 45157,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[WIP] PrismML Bonsai model support",
+    "updated_at": "2026-04-02T20:53:12Z"
+  },
+  {
+    "additions": 50,
+    "author": "Cursx",
     "author_association": "NONE",
-    "body_excerpt": "# Fix granite_speech config loading failure with int multiplier fields ## Fixes #44877 ### Problem Loading `granite_speech` configs fails with `StrictDataclassFieldValidationError` when multiplier fields (e.g., `embedding_multiplier`) are\u2026",
-    "changed_files": 3,
+    "body_excerpt": "# What does this PR do? After the `merge_and_unload()` operation in PEFT, embed_tokens and lm_head become independent tensors with different values, but config.tie_word_embeddings remains True. The load-side already detects this using torc\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44904",
-    "created_at": "2026-03-21T03:12:37Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45156",
+    "created_at": "2026-04-01T02:36:38Z",
     "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44904/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44904",
-    "labels": [
-      "Code agent slop"
-    ],
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45156/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45156",
+    "labels": [],
     "merged": false,
-    "number": 44904,
+    "number": 45156,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(granite_speech): convert int to float for multiplier fields in text_config",
-    "updated_at": "2026-03-23T10:37:38Z"
+    "title": "Fix save_pretrained writing incorrect tie_word_embeddings=True config after PEFT merge",
+    "updated_at": "2026-04-01T09:34:05Z"
   },
   {
-    "additions": 16,
-    "author": "yonigozlan",
+    "additions": 22,
+    "author": "michaelbenayoun",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Some remote code models are using `get_size_dict` directly, and now that size is converted to SizeDict in init, we need to support it as input in `get_size_dict`",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44903",
-    "created_at": "2026-03-21T01:25:53Z",
-    "deletions": 7,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45155",
+    "created_at": "2026-03-31T22:23:06Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44903/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44903",
+    "files_url": "https://github.com/huggingface/transformers/pull/45155/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45155",
     "labels": [],
-    "merged": true,
-    "number": 44903,
-    "review_comments_count": 5,
-    "state": "closed",
-    "title": "Support SizeDict import in get_size_dict",
-    "updated_at": "2026-03-23T10:28:52Z"
+    "merged": false,
+    "number": 45155,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "feat: adapter loading with TP",
+    "updated_at": "2026-03-31T22:34:45Z"
   },
   {
-    "additions": 3,
-    "author": "guoyangzhen",
-    "author_association": "NONE",
-    "body_excerpt": "## Problem `_split_tokens_on_unicode()` crashes with `IndexError: string index out of range` when the decoded token stream ends with a dangling Unicode replacement character (\\uFFFD). The computed index `unicode_offset + decoded.index(repl\u2026",
-    "changed_files": 1,
+    "additions": 676,
+    "author": "FaizanImran-blip",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Fixed bug #45072 / #45071 where PretrainedConfig type checking and Pydantic model field validation were broken in v5.4.0. Added proper type checking for 'num_labels'. Added unit tests in test.py to verify correct and incorrect types. Verif\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44902",
-    "created_at": "2026-03-20T22:08:49Z",
-    "deletions": 1,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45154",
+    "created_at": "2026-03-31T20:28:32Z",
+    "deletions": 153,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44902/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44902",
+    "files_url": "https://github.com/huggingface/transformers/pull/45154/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45154",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44902,
+    "number": 45154,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: Whisper word timestamp OOB access on trailing replacement char",
-    "updated_at": "2026-03-23T11:59:14Z"
+    "title": "Pretrained-config bug(45072/huggingfacebug)",
+    "updated_at": "2026-04-06T20:48:30Z"
   },
   {
-    "additions": 19,
-    "author": "harshaljanjani",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? The following failing Perceiver use case was identified and fixed in this PR: \u2192 c6d2848a23 ([\ud83d\udea8 Fix torch.jit.trace for interpolate_pos_encoding in all vision models](https://github.com/huggingface/transformers/pul\u2026",
-    "changed_files": 2,
+    "additions": 502,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "As per title, with torch releasing the varlen API, we can somewhat use native FA (with limited feature support) Restrictions - Unsupported features - Dropout - Learnable sinks (attention sinks) - Determinism - Softcap - CB KV cache native\u2026",
+    "changed_files": 19,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44899",
-    "created_at": "2026-03-20T20:02:10Z",
-    "deletions": 1,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44899/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44899",
-    "labels": [],
-    "merged": true,
-    "number": 44899,
-    "review_comments_count": 2,
-    "state": "closed",
-    "title": "fix(models): Fix Perceiver interpolate_pos_encoding interpolating to the source size",
-    "updated_at": "2026-03-25T11:54:23Z"
-  },
-  {
-    "additions": 14,
-    "author": "yonigozlan",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Add compatibility with remote code importing image_processing_utils_fast modules and methods using `from transformers.image_processing_utils_fast import ...`",
-    "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44897",
-    "created_at": "2026-03-20T19:30:32Z",
-    "deletions": 5,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44897/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44897",
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45153",
+    "created_at": "2026-03-31T19:43:19Z",
+    "deletions": 386,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45153/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45153",
     "labels": [],
-    "merged": true,
-    "number": 44897,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Add backward compatibility for direct imports from legacy `image_processing_utils_fast`",
-    "updated_at": "2026-03-20T20:00:12Z"
+    "merged": false,
+    "number": 45153,
+    "review_comments_count": 25,
+    "state": "open",
+    "title": "[`FA`] Native torch integration",
+    "updated_at": "2026-04-01T20:01:16Z"
   },
   {
-    "additions": 354,
+    "additions": 389,
     "author": "stevhliu",
     "author_association": "MEMBER",
-    "body_excerpt": "updates the continuous batching docs - new page for the API reference - adds sections for new features like CUDA graphs, async batching, prefix caching, logprobs (depending on when its merged) - clearer example of generation with varying l\u2026",
-    "changed_files": 4,
+    "body_excerpt": "refactors the testing docs to be more contributor-facing organized around writing model tests instead of being a collection of pytest usage examples and CI maintenance. also updates the pr checks doc so contributors are better prepared to\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44896",
-    "created_at": "2026-03-20T19:09:41Z",
-    "deletions": 81,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45152",
+    "created_at": "2026-03-31T18:12:09Z",
+    "deletions": 1363,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44896/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44896",
+    "files_url": "https://github.com/huggingface/transformers/pull/45152/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45152",
     "labels": [],
     "merged": false,
-    "number": 44896,
+    "number": 45152,
     "review_comments_count": 0,
     "state": "open",
-    "title": "[docs] continuous batching",
-    "updated_at": "2026-03-20T19:31:26Z"
+    "title": "[docs] model testing",
+    "updated_at": "2026-04-03T18:24:13Z"
   },
   {
-    "additions": 57,
-    "author": "SunMarc",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR enables static FP8 experts. This also works on multi-gpu with device-map. A fix for that was to set was to set `torch.cuda.set_device()`. Triton's JIT compiler uses he active device context to determine whic\u2026",
+    "additions": 1,
+    "author": "maanas1234",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? The previous code snippet had an error in how `TrainingArguments` was being used, which could lead to confusion or incorrect implementation. Corrected the example to properly demonstrate how to initialize and use `T\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44895",
-    "created_at": "2026-03-20T19:01:35Z",
-    "deletions": 10,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45150",
+    "created_at": "2026-03-31T17:31:37Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44895/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44895",
+    "files_url": "https://github.com/huggingface/transformers/pull/45150/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45150",
     "labels": [],
     "merged": true,
-    "number": 44895,
-    "review_comments_count": 4,
+    "number": 45150,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Add static FP8 expert support ",
-    "updated_at": "2026-03-24T14:27:31Z"
+    "title": "Fix incorrect TrainingArguments example in training.md",
+    "updated_at": "2026-03-31T18:31:25Z"
   },
   {
-    "additions": 10,
-    "author": "ydshieh",
+    "additions": 4633,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "## Problem `ProcessorMixin.to_dict()` was calling `copy.deepcopy(self.__dict__)` on the entire processor, including the tokenizer, even though the tokenizer is always deleted from the output immediately after (since tokenizers are saved se\u2026",
-    "changed_files": 1,
+    "body_excerpt": "First pass: ~1M+ tokens in, ~115K+ out, Opus mainly, $42, 1h30 # PR #44320 vs Our Implementation ## What we got right - Same overall structure: modular file + generated standalone files + conversion script + tests + docs - Same model direc\u2026",
+    "changed_files": 20,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44894",
-    "created_at": "2026-03-20T18:57:53Z",
-    "deletions": 9,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44894/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44894",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45149",
+    "created_at": "2026-03-31T15:56:11Z",
+    "deletions": 1,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45149/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45149",
     "labels": [],
-    "merged": true,
-    "number": 44894,
+    "merged": false,
+    "number": 45149,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix `processing_utils.py`: avoid deepcopying tokenizer in `ProcessorMixin` to improve performance",
-    "updated_at": "2026-03-23T10:09:02Z"
+    "state": "open",
+    "title": "DO NOT MERGE adding SAML3-LiteText with a skill, first pass",
+    "updated_at": "2026-04-01T06:28:20Z"
   },
   {
-    "additions": 18,
-    "author": "ai-man-codes",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes #43011 `StaticLayer` was missing a `.crop(max_length)` method, so implemented that according to the discussion of the issue. Added `StaticLayer.crop(max_length)` to match the API of StaticCache with the Dynami\u2026",
-    "changed_files": 1,
+    "additions": 13,
+    "author": "HallerPatrick",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes #45146 ## Code Agent Policy The Transformers repo is currently being overwhelmed by a large number of PRs and issue comments written by code agents. We are currently bottlenecked by our ability to review and r\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44893",
-    "created_at": "2026-03-20T17:48:23Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45148",
+    "created_at": "2026-03-31T15:20:02Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44893/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44893",
+    "files_url": "https://github.com/huggingface/transformers/pull/45148/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45148",
     "labels": [],
     "merged": false,
-    "number": 44893,
+    "number": 45148,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "add `StaticLayer.crop()` to match `DynamicLayer` API",
-    "updated_at": "2026-03-20T18:08:10Z"
+    "state": "closed",
+    "title": "Allow for all layers in Qwen3.5 architecture to be Gated Deltanet.",
+    "updated_at": "2026-04-02T11:18:12Z"
   },
   {
-    "additions": 51,
-    "author": "he-yufeng",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Fixes #44821 The `elif is_remote_url(...)` / `download_url(...)` branch in `get_image_processor_dict` was accidentally removed during the image processor refactor in #43514. This caused `AutoImageProcessor.from_pretrained(url)` to break wi\u2026",
-    "changed_files": 5,
+    "additions": 309,
+    "author": "mobicham",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR fixes hqq support that has been broken for a couple of months now after a refactoring: * Online quantization works fine now. * Serialization to load/save HQQ models is fixed too. ## Code Agent Policy - [x] I\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44892",
-    "created_at": "2026-03-20T16:21:25Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45147",
+    "created_at": "2026-03-31T14:59:47Z",
+    "deletions": 12,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44892/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44892",
+    "files_url": "https://github.com/huggingface/transformers/pull/45147/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45147",
     "labels": [],
     "merged": false,
-    "number": 44892,
-    "review_comments_count": 4,
-    "state": "closed",
-    "title": "Fix AutoImageProcessor.from_pretrained failing on URL input",
-    "updated_at": "2026-03-24T13:30:38Z"
+    "number": 45147,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Fix broken HQQ support",
+    "updated_at": "2026-04-07T07:53:33Z"
   },
   {
-    "additions": 507,
-    "author": "kashif",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Add a MoERouterHealthCallback to log MoE router-health metrics. <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the\u2026",
-    "changed_files": 7,
+    "additions": 2340,
+    "author": "casinca",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Hello, this PR aims to add the MiMo-V2-Flash model to the Transformers library Fixes https://github.com/huggingface/transformers/issues/42954 MiMo-V2 is \"The last of the SOTAs\" that isn't natively supported by the T\u2026",
+    "changed_files": 12,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44891",
-    "created_at": "2026-03-20T16:17:05Z",
-    "deletions": 1,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45144",
+    "created_at": "2026-03-31T13:53:28Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44891/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44891",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/45144/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45144",
+    "labels": [
+      "New model"
+    ],
     "merged": false,
-    "number": 44891,
+    "number": 45144,
     "review_comments_count": 0,
     "state": "open",
-    "title": "[Trainer] add MoERouterHealthCallback Callback",
-    "updated_at": "2026-03-20T16:28:43Z"
+    "title": "Add Xiaomi MiMo-V2",
+    "updated_at": "2026-04-02T15:43:34Z"
   },
   {
-    "additions": 72,
+    "additions": 102,
     "author": "Rocketknight1",
     "author_association": "MEMBER",
-    "body_excerpt": "As discussed on Slack, this is the first phase of our approach to controlling the code agent epidemic. This PR places large warnings in both the pull request template and `CONTRIBUTING.md`, which should hopefully be seen by most contributo\u2026",
-    "changed_files": 2,
+    "body_excerpt": "This PR adds `parse_response` to Processor classes by wrapping the `Tokenizer` method! cc @zucchini-nlp",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44890",
-    "created_at": "2026-03-20T16:12:45Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45143",
+    "created_at": "2026-03-31T13:11:49Z",
+    "deletions": 9,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44890/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44890",
+    "files_url": "https://github.com/huggingface/transformers/pull/45143/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45143",
     "labels": [],
     "merged": true,
-    "number": 44890,
-    "review_comments_count": 7,
+    "number": 45143,
+    "review_comments_count": 4,
     "state": "closed",
-    "title": "Add big angry code agent warnings!",
-    "updated_at": "2026-03-23T11:54:48Z"
+    "title": "Add parse_response to Processor, make it a bit more official",
+    "updated_at": "2026-03-31T17:07:16Z"
   },
   {
-    "additions": 86,
-    "author": "roycho96",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Calling `trainer.evaluate()` before `trainer.train()` with DeepSpeed is broken in three ways: 1. **ZeRO-3 stale state crash:** `evaluate()` creates an inference engine. `train()` starts with `accelerator.free_memor\u2026",
+    "additions": 4,
+    "author": "casinca",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44889",
-    "created_at": "2026-03-20T15:08:32Z",
-    "deletions": 21,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45142",
+    "created_at": "2026-03-31T12:47:46Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44889/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44889",
+    "files_url": "https://github.com/huggingface/transformers/pull/45142/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45142",
     "labels": [],
     "merged": false,
-    "number": 44889,
+    "number": 45142,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "[DeepSpeed] Fix evaluate()/predict() before train()",
-    "updated_at": "2026-03-21T11:06:07Z"
+    "state": "closed",
+    "title": "refactor(gpt-oss): rename `eager_attention_forward` to `eager_attention_forward_with_sink`",
+    "updated_at": "2026-04-02T16:44:14Z"
   },
   {
-    "additions": 2,
-    "author": "Cyrilvallez",
+    "additions": 19,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? In general, it should be much better to let the kernel do what it wants for perfs! There's no reasons to have troubles from it!",
+    "body_excerpt": "# What does this PR do? Really stupid ... Currently, change a docstring will count as \"modified files\", then the impacted files and test files to run are computed. Say, we add a comment to bert, many files impacted (via dependency) then ma\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44888",
-    "created_at": "2026-03-20T14:45:28Z",
-    "deletions": 22,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45140",
+    "created_at": "2026-03-31T09:41:28Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44888/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44888",
+    "files_url": "https://github.com/huggingface/transformers/pull/45140/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45140",
     "labels": [],
-    "merged": false,
-    "number": 44888,
+    "merged": true,
+    "number": 45140,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Remove explicit cuda stream in nemotron_h",
-    "updated_at": "2026-03-23T15:14:13Z"
+    "state": "closed",
+    "title": "Fix stupid test fetcher",
+    "updated_at": "2026-03-31T11:06:51Z"
   },
   {
-    "additions": 2,
-    "author": "Cyrilvallez",
+    "additions": 848,
+    "author": "ArthurZucker",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. On currently pinned version, when we run this small snippet (which is called on some model's `__init__` functions \ud83d\ude05): ```python from transformers.integrations.hub_kernels import lazy_load_kernel ca\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? More fixes",
+    "changed_files": 98,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44887",
-    "created_at": "2026-03-20T14:00:33Z",
-    "deletions": 2,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45139",
+    "created_at": "2026-03-31T09:26:28Z",
+    "deletions": 886,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44887/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44887",
+    "files_url": "https://github.com/huggingface/transformers/pull/45139/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45139",
     "labels": [],
-    "merged": true,
-    "number": 44887,
-    "review_comments_count": 1,
-    "state": "closed",
-    "title": "Bump kernels version dependency to avoid crashes",
-    "updated_at": "2026-03-20T19:01:51Z"
+    "merged": false,
+    "number": 45139,
+    "review_comments_count": 20,
+    "state": "open",
+    "title": "Fix vllm cis",
+    "updated_at": "2026-04-02T15:44:00Z"
   },
   {
-    "additions": 14,
-    "author": "m-matthias",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Prevent crash in class LwDetrImageLoss when using it with float16 automatic mixed precision on a Cuda device. torch.pow causes an autocast to float32 when used with Cuda, which caused a type mismatch at ``` pos_weig\u2026",
-    "changed_files": 2,
+    "additions": 2,
+    "author": "Abdennacer-Badaoui",
+    "author_association": "MEMBER",
+    "body_excerpt": "This is a small T5 expectations update. It is the same for both AMD and NVIDIA A10 GPUs.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44886",
-    "created_at": "2026-03-20T13:56:08Z",
-    "deletions": 12,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45138",
+    "created_at": "2026-03-31T08:41:40Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44886/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44886",
+    "files_url": "https://github.com/huggingface/transformers/pull/45138/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45138",
     "labels": [],
     "merged": true,
-    "number": 44886,
-    "review_comments_count": 4,
+    "number": 45138,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "LwDetrImageLoss: Fix dtype casting to prevent crash when using amp on cuda device",
-    "updated_at": "2026-03-24T17:02:32Z"
+    "title": "CI] Small T5 expectations updated",
+    "updated_at": "2026-04-02T08:21:25Z"
   },
   {
-    "additions": 2,
-    "author": "guoyangzhen",
+    "additions": 83,
+    "author": "Cursx",
     "author_association": "NONE",
-    "body_excerpt": "## Problem In _split_tokens_on_unicode(), when the decoded token stream ends with a dangling Unicode replacement character (U+FFFD), the computed index can equal len(decoded_full), causing IndexError: string index out of range. The failing\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? This PR fixes a bug in `PreTrainedModel.save_pretrained()` where `config.tie_word_embeddings` can be inconsistent with the actual weight state, leading to silent model corruption for downstream consumers. ### Proble\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44885",
-    "created_at": "2026-03-20T13:03:54Z",
-    "deletions": 1,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45136",
+    "created_at": "2026-03-31T06:45:53Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44885/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44885",
+    "files_url": "https://github.com/huggingface/transformers/pull/45136/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45136",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44885,
+    "number": 45136,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: prevent IndexError in Whisper word timestamp decode",
-    "updated_at": "2026-03-23T12:01:50Z"
+    "title": "Fix #45127: Auto-fix diverged tie_word_embeddings config on save to prevent silent weight corruption",
+    "updated_at": "2026-03-31T12:21:32Z"
   },
   {
-    "additions": 14,
-    "author": "hmellor",
-    "author_association": "MEMBER",
-    "body_excerpt": "Some libraries that use Transformers (i.e. vLLM) use `|` on the `size` config. This PR adds `__or__` and `__ror__` so that the following works: ```console $ {\"longest_edge\": 20} | SizeDict(height=10, width=20) {'longest_edge': 20, 'height'\u2026",
+    "additions": 44,
+    "author": "Cursx",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? Fixes an issue where PEFT adapters applied independently to tied embeddings (`embed_tokens` and [lm_head](cci:1://file:///d:/transformers/transformers/src/transformers/modeling_utils.py:2858:4-2985:26)) cause silent\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44884",
-    "created_at": "2026-03-20T11:52:15Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45135",
+    "created_at": "2026-03-31T02:28:26Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44884/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44884",
+    "files_url": "https://github.com/huggingface/transformers/pull/45135/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45135",
     "labels": [],
-    "merged": true,
-    "number": 44884,
-    "review_comments_count": 2,
+    "merged": false,
+    "number": 45135,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Add missing dunder methods to `SizeDict`",
-    "updated_at": "2026-03-20T12:21:12Z"
+    "title": "Fix model saving corruption for dynamically untied embeddings",
+    "updated_at": "2026-03-31T04:36:56Z"
   },
   {
-    "additions": 2,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fix https://github.com/huggingface/transformers/issues/44589.",
+    "additions": 91,
+    "author": "milesial",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Add support for CUDA parakeet preprocessor, running STFT and mel spectrogram extraction on the GPU. This refactor also speeds up the CPU implementation. Tested on `nvidia/parakeet-ctc-0.6b`, B200, 300s audio: Before\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44883",
-    "created_at": "2026-03-20T11:43:13Z",
-    "deletions": 1,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45134",
+    "created_at": "2026-03-31T01:59:28Z",
+    "deletions": 56,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44883/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44883",
+    "files_url": "https://github.com/huggingface/transformers/pull/45134/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45134",
     "labels": [],
-    "merged": true,
-    "number": 44883,
-    "review_comments_count": 2,
-    "state": "closed",
-    "title": "Fix dtype guessing from state dict",
-    "updated_at": "2026-03-20T13:12:34Z"
+    "merged": false,
+    "number": 45134,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Optimize Parakeet feature extraction on CUDA",
+    "updated_at": "2026-04-03T05:09:37Z"
   },
   {
-    "additions": 1,
+    "additions": 1944,
     "author": "itazap",
     "author_association": "MEMBER",
-    "body_excerpt": "fixes ```python model = \"meta-llama/Llama-4-Maverick-17B-128E-Instruct\" tok_auto = AutoTokenizer.from_pretrained(model) print(f\"AutoTokenizer: {tok_auto('hello')}\") ``` ``` The above exception was the direct cause of the following exceptio\u2026",
-    "changed_files": 1,
+    "body_excerpt": "## Summary - Auto-generated modular integration for `sarvam` - `modular_sarvam.py` written via HF Inference API guided by `modular_model_detector.py` - `modeling_sarvam.py` regenerated from modular via `modular_model_converter.py` ## Test\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44882",
-    "created_at": "2026-03-20T11:31:20Z",
-    "deletions": 1,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44882/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44882",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45133",
+    "created_at": "2026-03-31T00:00:15Z",
+    "deletions": 0,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45133/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45133",
     "labels": [],
     "merged": false,
-    "number": 44882,
+    "number": 45133,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix config type",
-    "updated_at": "2026-03-20T16:34:20Z"
+    "state": "open",
+    "title": "Add sarvam model",
+    "updated_at": "2026-03-31T00:09:59Z"
   },
   {
-    "additions": 142,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per title, we don't need a weird way to filter out kwargs anymore because now we don't rely on `tokenizer.apply_chat_template`. I didn't delete the unused `TypedDict` yet and will deprecate for at least 3 minor r\u2026",
-    "changed_files": 6,
+    "additions": 215,
+    "author": "akintunero",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary This PR fixes GitHub issue #45120: \"Double softmax in MoE router load-balancing loss\". MoE routers in Mixtral, Qwen2MoE, and Qwen3VLMoE were applying softmax inside forward(), then the load_balancing_loss_func applied softmax AG\u2026",
+    "changed_files": 9,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44881",
-    "created_at": "2026-03-20T10:44:06Z",
-    "deletions": 82,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45132",
+    "created_at": "2026-03-30T22:45:01Z",
+    "deletions": 34,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44881/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44881",
+    "files_url": "https://github.com/huggingface/transformers/pull/45132/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45132",
     "labels": [],
-    "merged": true,
-    "number": 44881,
-    "review_comments_count": 10,
+    "merged": false,
+    "number": 45132,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Allow arbitrary template kwargs in processors",
-    "updated_at": "2026-03-24T11:14:33Z"
+    "title": "Fix: Remove double softmax in MoE router load-balancing loss (Mixtral, Qwen2MoE, Qwen3VLMoE)",
+    "updated_at": "2026-03-30T23:36:57Z"
   },
   {
-    "additions": 34,
-    "author": "itazap",
-    "author_association": "MEMBER",
-    "body_excerpt": "incorrect model list update",
-    "changed_files": 3,
+    "additions": 231,
+    "author": "yacinemebarki",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes issue #45120: Several MoE routers returned softmaxed probabilities as `router_logits`, which caused `load_balancing_loss_func` to compute softmax(softmax(logits)), flattening routing distributions and weakenin\u2026",
+    "changed_files": 68,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44880",
-    "created_at": "2026-03-20T10:37:13Z",
-    "deletions": 5,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45131",
+    "created_at": "2026-03-30T21:18:47Z",
+    "deletions": 156,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44880/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44880",
+    "files_url": "https://github.com/huggingface/transformers/pull/45131/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45131",
     "labels": [],
-    "merged": true,
-    "number": 44880,
+    "merged": false,
+    "number": 45131,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "incorrect model list update",
-    "updated_at": "2026-03-24T09:27:24Z"
+    "state": "open",
+    "title": "Fix MoE routers returning probabilities instead of logits",
+    "updated_at": "2026-04-01T17:51:59Z"
   },
   {
-    "additions": 448,
-    "author": "tarekziade",
+    "additions": 120,
+    "author": "stevhliu",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? - uses the Makefile as a single source of truth for running QA checks - adds `tomli` so `make` commands can read the `toml` file when needed - adds a `checkers` Python module that wraps and orchestrates all `checks`\u2026",
-    "changed_files": 7,
+    "body_excerpt": "updates the `@auto_docstring` docs as part of the process of improving the model addition docs - updates title to be more precise as it can mean documenting a model in a `model.md` file as well - flat hierarchy before \u2192 each component-type\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44879",
-    "created_at": "2026-03-20T10:24:29Z",
-    "deletions": 90,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45130",
+    "created_at": "2026-03-30T20:14:33Z",
+    "deletions": 152,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44879/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44879",
+    "files_url": "https://github.com/huggingface/transformers/pull/45130/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45130",
     "labels": [],
-    "merged": true,
-    "number": 44879,
-    "review_comments_count": 6,
-    "state": "closed",
-    "title": "refactor: unify QA calls",
-    "updated_at": "2026-03-25T08:51:30Z"
+    "merged": false,
+    "number": 45130,
+    "review_comments_count": 1,
+    "state": "open",
+    "title": "[docs] @auto_docstring decorator",
+    "updated_at": "2026-04-01T14:32:24Z"
   },
   {
-    "additions": 8,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? `check_docstrings` has been complaining for a while about those.",
+    "additions": 5,
+    "author": "IgnazioDS",
+    "author_association": "NONE",
+    "body_excerpt": "## Problem Fixes #45070. `PreTrainedConfig.dtype` was annotated as `Union[str, \"torch.dtype\"] | None`. Since `torch` is only imported under `TYPE_CHECKING`, pydantic's schema builder encounters the `\"torch.dtype\"` forward reference at runt\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44878",
-    "created_at": "2026-03-20T10:01:08Z",
-    "deletions": 8,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45129",
+    "created_at": "2026-03-30T19:13:52Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44878/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44878",
-    "labels": [],
-    "merged": true,
-    "number": 44878,
+    "files_url": "https://github.com/huggingface/transformers/pull/45129/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45129",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 45129,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix nemotron config docstrings",
-    "updated_at": "2026-03-20T10:11:04Z"
+    "title": "fix(config): annotate PreTrainedConfig.dtype as Any to fix pydantic schema generation (#45070)",
+    "updated_at": "2026-03-31T12:33:39Z"
   },
   {
-    "additions": 1,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do?",
-    "changed_files": 2,
+    "additions": 443,
+    "author": "FaizanImran-blip",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "This PR fixes a crash in `_process_kwargs_parameters` that occurred when a module uses `from __future__ import annotations`. - Prevents AttributeError crash. - Adds a test `test_future_annotations.py` to verify the fix works. No other func\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44876",
-    "created_at": "2026-03-20T09:49:54Z",
-    "deletions": 7,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45128",
+    "created_at": "2026-03-30T19:10:48Z",
+    "deletions": 95,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44876/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44876",
+    "files_url": "https://github.com/huggingface/transformers/pull/45128/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45128",
     "labels": [],
-    "merged": true,
-    "number": 44876,
+    "merged": false,
+    "number": 45128,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix nemotron_h modular",
-    "updated_at": "2026-03-20T10:00:35Z"
+    "state": "open",
+    "title": "Fix: handle future annotations in _process_kwargs_parameters",
+    "updated_at": "2026-03-31T10:44:40Z"
   },
   {
-    "additions": 872,
+    "additions": 170,
     "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Refactors `src/transformers/cli/serve.py` to reduce nesting depth, eliminate code duplication, and improve maintainability. No behavioral changes and the public API is unchanged. Also added a module docstring to exp\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? This patch adds exponential back-off for `load_audio_librosa` / `load_audio_as` / `load_audio_torchcodec` when they try to download a file. That generic utility is also used within `hub_retry` notice that there were\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44875",
-    "created_at": "2026-03-20T09:06:34Z",
-    "deletions": 701,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44875/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44875",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45126",
+    "created_at": "2026-03-30T16:34:40Z",
+    "deletions": 38,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45126/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45126",
     "labels": [],
     "merged": false,
-    "number": 44875,
+    "number": 45126,
     "review_comments_count": 0,
     "state": "open",
-    "title": "refactor: improved the cli server module code organization",
-    "updated_at": "2026-03-23T08:08:17Z"
+    "title": "http retries on audio file downloads",
+    "updated_at": "2026-03-30T17:08:33Z"
   },
   {
-    "additions": 2,
-    "author": "hmellor",
-    "author_association": "MEMBER",
-    "body_excerpt": "`Llama4`'s was incorrect and causing `StrictDataclassFieldValidationErrors`. `AFMoE`'s was was fine but now it's more specific.",
+    "additions": 3,
+    "author": "danielquintas8",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Adds `_tp_plan = {\"lm_head\": \"colwise_gather_output\"}` to `Qwen3_5MoeForConditionalGeneration` (the VL wrapper class). The text-only `Qwen3_5MoeForCausalLM` already had `_tp_plan`, but the VL variant was missing it.\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44874",
-    "created_at": "2026-03-20T09:05:02Z",
-    "deletions": 2,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45124",
+    "created_at": "2026-03-30T16:23:11Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44874/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44874",
+    "files_url": "https://github.com/huggingface/transformers/pull/45124/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45124",
     "labels": [],
     "merged": true,
-    "number": 44874,
+    "number": 45124,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix `layer_types` type hint for `AFMoE` and `Llama4`",
-    "updated_at": "2026-03-20T12:03:58Z"
+    "title": "[Qwen3.5 MoE] Add _tp_plan to ForConditionalGeneration",
+    "updated_at": "2026-04-02T14:10:01Z"
   },
   {
-    "additions": 75,
-    "author": "sergiopaniego",
+    "additions": 1,
+    "author": "Rocketknight1",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? ## Problem Online RL training (GRPO, RLOO, PPO) with all VL models using MRoPE with rope_deltas (Qwen2-VL, Qwen2.5-VL, Qwen3-VL, Qwen3.5, GLM4V, PaddleOCR-VL, Ernie4.5-VL-MoE, etc.) crashes with `RuntimeError: Sizes\u2026",
-    "changed_files": 15,
+    "body_excerpt": "The `test_ocr_queries` assertion value was wrong, even at the initial commit! I'm not sure how tests passed at the time but they're failing now in the CI. This PR fixes the target value!",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44873",
-    "created_at": "2026-03-20T08:38:03Z",
-    "deletions": 30,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45123",
+    "created_at": "2026-03-30T15:48:42Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44873/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44873",
+    "files_url": "https://github.com/huggingface/transformers/pull/45123/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45123",
     "labels": [],
     "merged": true,
-    "number": 44873,
+    "number": 45123,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix VL model rope_deltas batch size mismatch in online RL training",
-    "updated_at": "2026-03-20T13:51:08Z"
+    "title": "Fix PP test_ocr_queries",
+    "updated_at": "2026-03-30T16:28:38Z"
   },
   {
-    "additions": 2,
-    "author": "IvanFan-Van",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Description Update outdated comment that references non-existent file `generation_utils_samplers.py` ## Changes Detail - The comment on line 1200 states \"all samplers can be found in `generation_utils_samplers.py`\" - In reality, all sam\u2026",
-    "changed_files": 1,
+    "additions": 106,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "Native model integration should never have anything related to remote code execution: That just means that we would need to add that model natively, not via remote - it can and will not be maintained by us. Additionally added a new linter\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44872",
-    "created_at": "2026-03-20T05:45:46Z",
-    "deletions": 1,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45122",
+    "created_at": "2026-03-30T15:40:30Z",
+    "deletions": 34,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44872/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44872",
-    "labels": [],
-    "merged": false,
-    "number": 44872,
+    "files_url": "https://github.com/huggingface/transformers/pull/45122/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45122",
+    "labels": [
+      "for patch"
+    ],
+    "merged": true,
+    "number": 45122,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Fix: Update outdated sampler comment in generation/utils.py",
-    "updated_at": "2026-03-20T05:45:46Z"
+    "state": "closed",
+    "title": ":rotating_light: [`LightGlue`] Remove remote code execution",
+    "updated_at": "2026-03-31T12:11:19Z"
   },
   {
-    "additions": 666,
-    "author": "JonusClapshaw",
+    "additions": 56,
+    "author": "orbisai0security",
     "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? Fixes #42200 `prediction_step` is type-hinted to return `Optional[torch.Tensor]` for logits, but when no `preprocess_logits_for_metrics` is provided it could return a tuple instead of a tensor. This caused `torch_pa\u2026",
-    "changed_files": 33,
+    "body_excerpt": "## Summary Fix critical severity security issue in `src/transformers/cli/serve.py`. ## Vulnerability | Field | Value | |-------|-------| | **ID** | V-007 | | **Severity** | CRITICAL | | **Scanner** | multi_agent_ai | | **Rule** | `V-007` |\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44870",
-    "created_at": "2026-03-20T02:28:27Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45121",
+    "created_at": "2026-03-30T15:02:06Z",
     "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44870/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44870",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/45121/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45121",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44870,
+    "number": 45121,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: ensure prediction_step returns tensor for logits, not tuple #42200",
-    "updated_at": "2026-03-20T17:51:19Z"
+    "title": "fix: remove unsafe exec() in serve.py",
+    "updated_at": "2026-03-31T12:09:59Z"
   },
   {
-    "additions": 98,
-    "author": "sdharani91",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? Fixes #44717 This PR fixes packed-sequence handling for the Qwen3.5 linear-attention fast path. Before this change, Qwen3.5 produced different outputs for: a padded representation of multiple sequences a packed repr\u2026",
-    "changed_files": 3,
+    "additions": 39,
+    "author": "akintunero",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary This PR fixes GitHub issue #45071: \"v5.4.0 breaks PretrainedConfig type checking\". The regression prevents type checkers (mypy, pyright) from validating `PretrainedConfig` subclass instantiation with valid parameters. ## Root Ca\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44867",
-    "created_at": "2026-03-19T17:31:45Z",
-    "deletions": 5,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45119",
+    "created_at": "2026-03-30T13:48:47Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44867/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44867",
+    "files_url": "https://github.com/huggingface/transformers/pull/45119/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45119",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44867,
+    "number": 45119,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Pass packed boundary metadata to Qwen3.5 linear-attention fast kernels",
-    "updated_at": "2026-03-20T13:15:15Z"
+    "title": "Fix: Preserve PreTrainedConfig __init__ signatures for type checkers (fixes #45071)",
+    "updated_at": "2026-03-31T12:17:09Z"
   },
   {
-    "additions": 78,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title",
-    "changed_files": 3,
+    "additions": 194,
+    "author": "sirzechs66",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR adds full GGUF loading support for GPT\u2011OSS models (20B/120B). It allows Transformers (and consequently vLLM) to directly load GPT\u2011OSS GGUF files without falling back to a wrong architecture. The changes incl\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 8,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44866",
-    "created_at": "2026-03-19T17:27:58Z",
-    "deletions": 75,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45118",
+    "created_at": "2026-03-30T13:10:36Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44866/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44866",
+    "files_url": "https://github.com/huggingface/transformers/pull/45118/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45118",
     "labels": [],
-    "merged": true,
-    "number": 44866,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Align lfm2 cache to other mamba caches",
-    "updated_at": "2026-03-20T10:50:28Z"
+    "merged": false,
+    "number": 45118,
+    "review_comments_count": 7,
+    "state": "open",
+    "title": "Add full GGUF loading support for GPT\u2011OSS (fixes #43366, supersedes #43757)",
+    "updated_at": "2026-04-06T17:45:19Z"
   },
   {
-    "additions": 496,
-    "author": "tarekziade",
+    "additions": 20,
+    "author": "Rocketknight1",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Added Rule 11 forward() must not access non-nn.Module attributes on submodules (breaks pipeline parallelism with Identity replacement). we want to make sure we just use metadata in config and elesewere when in that\u2026",
-    "changed_files": 10,
+    "body_excerpt": "Fixes #45084. We need to resolve the chat template in the Voxtral code to avoid `None` being passed to `_get_template_variables()`! cc @zucchini-nlp, follow-up to #44881",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44865",
-    "created_at": "2026-03-19T16:39:59Z",
-    "deletions": 26,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45117",
+    "created_at": "2026-03-30T12:34:19Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44865/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44865",
+    "files_url": "https://github.com/huggingface/transformers/pull/45117/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45117",
     "labels": [],
-    "merged": true,
-    "number": 44865,
-    "review_comments_count": 1,
-    "state": "closed",
-    "title": "chore(typing): added rule 11",
-    "updated_at": "2026-03-23T12:29:21Z"
+    "merged": false,
+    "number": 45117,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Copy the template resolution logic from the base apply_chat_template to Voxtral",
+    "updated_at": "2026-03-30T13:35:54Z"
   },
   {
-    "additions": 99,
-    "author": "SunMarc",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR switches FP8 per-tensor implementation to rely on the official torch impl `torch._scaled_mm`. Note that `torch._scaled_mm` don't explicitly support per tensor. We hack the api a bit as it only support per ro\u2026",
-    "changed_files": 1,
+    "additions": 222,
+    "author": "sirzechs66",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR adds full GGUF loading support for GPT\u2011OSS models (20B/120B). It allows Transformers (and consequently vLLM) to directly load GPT\u2011OSS GGUF files without falling back to a wrong architecture. The changes incl\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44864",
-    "created_at": "2026-03-19T16:19:53Z",
-    "deletions": 12,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44864/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44864",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45116",
+    "created_at": "2026-03-30T12:18:41Z",
+    "deletions": 0,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45116/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45116",
     "labels": [],
     "merged": false,
-    "number": 44864,
+    "number": 45116,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Switch FP8 per tensor quant to use `torch._scaled_mm`",
-    "updated_at": "2026-03-20T19:05:05Z"
+    "title": "Add full GGUF loading support for GPT\u2011OSS (fixes #43366)",
+    "updated_at": "2026-03-30T16:50:24Z"
   },
   {
-    "additions": 19,
-    "author": "gh-wf",
-    "author_association": "NONE",
-    "body_excerpt": "Some models (e.g. Nemotron-H) define `_tied_weights_keys` as a list, which caused `AttributeError: 'list' object has no attribute 'keys'` when calling `save_pretrained` during full finetuning. # What does this PR do? `_get_tied_weight_keys\u2026",
-    "changed_files": 2,
+    "additions": 327,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44862",
-    "created_at": "2026-03-19T15:14:12Z",
-    "deletions": 2,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44862/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44862",
-    "labels": [
-      "Code agent slop"
-    ],
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45115",
+    "created_at": "2026-03-30T12:09:31Z",
+    "deletions": 375,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45115/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45115",
+    "labels": [],
     "merged": false,
-    "number": 44862,
+    "number": 45115,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: handle list-type _tied_weights_keys in _get_tied_weight_keys",
-    "updated_at": "2026-03-20T09:47:09Z"
+    "state": "open",
+    "title": "Refactor/nemotron h inherit granitemoehybrid",
+    "updated_at": "2026-03-30T12:23:34Z"
   },
   {
-    "additions": 11,
-    "author": "Cyrilvallez",
+    "additions": 16,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. As discussed offline with the Mistral team, the scaling applied to the query should not be the absolute one (old `cache_position`), but the actual `position_ids`, taking into account padding, packe\u2026",
+    "body_excerpt": "# What does this PR do? This patch fixes all doctests for the run_doctest job",
     "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44860",
-    "created_at": "2026-03-19T14:27:33Z",
-    "deletions": 17,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45114",
+    "created_at": "2026-03-30T11:08:02Z",
+    "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44860/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44860",
+    "files_url": "https://github.com/huggingface/transformers/pull/45114/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45114",
     "labels": [],
-    "merged": true,
-    "number": 44860,
+    "merged": false,
+    "number": 45114,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "[Mistral] Fix query scaling for Mistral4 and Ministral3",
-    "updated_at": "2026-03-19T18:02:06Z"
+    "state": "open",
+    "title": "fix: lets fix all doctests",
+    "updated_at": "2026-03-30T11:30:04Z"
   },
   {
-    "additions": 7001,
-    "author": "philippguevorguian",
-    "author_association": "NONE",
-    "body_excerpt": null,
-    "changed_files": 19,
+    "additions": 181,
+    "author": "cyyever",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR adds GPU Direct Storage (GDS) support for safetensors model loading via `torch.cuda.gds.GdsFile`. GDS is disabled by default, `HF_ENABLE_GDS=1` env is used to enable it. ## Benchmark A100 PCIe 40GB, Samsung\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44859",
-    "created_at": "2026-03-19T13:54:19Z",
-    "deletions": 138,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45113",
+    "created_at": "2026-03-30T10:55:35Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44859/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44859",
+    "files_url": "https://github.com/huggingface/transformers/pull/45113/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45113",
     "labels": [],
     "merged": false,
-    "number": 44859,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "refactor: rope in model, flatten vision, rely on qwen3 backone, misc changes",
-    "updated_at": "2026-03-19T14:08:01Z"
+    "number": 45113,
+    "review_comments_count": 3,
+    "state": "open",
+    "title": "Add GDS support for safetensors loading ",
+    "updated_at": "2026-03-31T15:17:16Z"
   },
   {
-    "additions": 111,
+    "additions": 233,
     "author": "remi-or",
     "author_association": "MEMBER",
-    "body_excerpt": "Right now, the continuous batching tests all use similar mechanisms, namely: 1. loading a model and a tokenizer 2. preparing data for generate or generate_batch 3. running generate to compare its outputs with generate_batch This PR adds 3\u2026",
-    "changed_files": 1,
+    "body_excerpt": "This PR adds a warmup phase before generation starts, turned on by default. It allows for better diagnostics and a more representative user experience than without warmup, where the cost of wamup is payed during the first request rather th\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44858",
-    "created_at": "2026-03-19T13:22:04Z",
-    "deletions": 188,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45112",
+    "created_at": "2026-03-30T10:43:32Z",
+    "deletions": 46,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44858/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44858",
+    "files_url": "https://github.com/huggingface/transformers/pull/45112/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45112",
     "labels": [],
     "merged": true,
-    "number": 44858,
-    "review_comments_count": 0,
+    "number": 45112,
+    "review_comments_count": 5,
     "state": "closed",
-    "title": "[CB] [Minor] Simplify test suite",
-    "updated_at": "2026-03-24T11:44:39Z"
+    "title": "[CB] Add warmup feature",
+    "updated_at": "2026-03-31T07:42:18Z"
   },
   {
-    "additions": 3,
-    "author": "hkc5",
+    "additions": 42,
+    "author": "ionut-anghelina",
     "author_association": "NONE",
-    "body_excerpt": "## Summary Python 3.13's stricter parser fails when there's a comment between the `@torch.jit.script` decorator and the function definition, causing an IndentationError when importing DebertaV2Model. ## Changes - Moved comments before the\u2026",
-    "changed_files": 1,
+    "body_excerpt": "## Summary - Several MoE routers applied softmax inside `forward()` but returned the result as `router_logits`. The `load_balancing_loss_func` then applied softmax **again**, computing the aux loss on `softmax(softmax(logits))` which flatt\u2026",
+    "changed_files": 13,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44856",
-    "created_at": "2026-03-19T12:33:00Z",
-    "deletions": 3,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45111",
+    "created_at": "2026-03-30T08:23:07Z",
+    "deletions": 42,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44856/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44856",
+    "files_url": "https://github.com/huggingface/transformers/pull/45111/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45111",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44856,
+    "number": 45111,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: move comments before @torch.jit.script decorator for Python 3.13 compatibility",
-    "updated_at": "2026-03-19T13:11:44Z"
+    "title": "Fix double softmax in MoE router load-balancing loss",
+    "updated_at": "2026-03-30T14:09:10Z"
   },
   {
-    "additions": 63,
-    "author": "ydshieh",
+    "additions": 7035,
+    "author": "NielsRogge",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? We had (flaky) ```bash tests/models/nemotron_h/test_modeling_nemotron_h.py::NemotronHModelTest::test_sdpa_can_compile_dynamic Fatal Python error: Segmentation fault ``` `NemotronHBlock.forward` creates a temporary `\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? [disclaimer: PR was entirely written by Codex where I just nudge it in the right directions, similar to #44285] ### Feature request I'd like to add support for Meta's [SAM 3.1](https://huggingface.co/facebook/sam3.1\u2026",
+    "changed_files": 20,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44854",
-    "created_at": "2026-03-19T10:54:36Z",
-    "deletions": 56,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45110",
+    "created_at": "2026-03-30T08:19:42Z",
+    "deletions": 29,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44854/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44854",
+    "files_url": "https://github.com/huggingface/transformers/pull/45110/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45110",
     "labels": [],
-    "merged": true,
-    "number": 44854,
-    "review_comments_count": 5,
-    "state": "closed",
-    "title": "Fix core dumped when `NemotronH` is torch compiled",
-    "updated_at": "2026-03-20T14:29:16Z"
+    "merged": false,
+    "number": 45110,
+    "review_comments_count": 3,
+    "state": "open",
+    "title": "Add SAM 3.1",
+    "updated_at": "2026-03-30T12:33:41Z"
   },
   {
-    "additions": 99,
-    "author": "sergiopaniego",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? `Zamba2MambaMixer.__init__` calls `lazy_load_kernel(\"mamba-ssm\")` and `lazy_load_kernel(\"causal-conv1d\")` unconditionally. Models that inherit from it (like NemotronH) and set `use_mamba_kernels=False` in their conf\u2026",
-    "changed_files": 3,
+    "additions": 42,
+    "author": "aws-zhanxun",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? `T5Attention.forward` hard-codes `n_heads` and `inner_dim` in `view()` calls. When using PyTorch Tensor Parallelism, `ColwiseParallel` shards the q/k/v projection output dim from `inner_dim` to `inner_dim / tp_size`\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44853",
-    "created_at": "2026-03-19T10:22:40Z",
-    "deletions": 72,
+    "comments_count": 10,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45109",
+    "created_at": "2026-03-30T07:06:19Z",
+    "deletions": 48,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44853/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44853",
+    "files_url": "https://github.com/huggingface/transformers/pull/45109/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45109",
     "labels": [],
-    "merged": false,
-    "number": 44853,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "Fix Zamba2MambaMixer ignoring use_mamba_kernels=False",
-    "updated_at": "2026-03-23T14:14:40Z"
-  },
+    "merged": true,
+    "number": 45109,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "Fix T5Attention shape mismatch under Tensor Parallelism",
+    "updated_at": "2026-04-01T16:21:32Z"
+  },
   {
-    "additions": 117,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "An eos token can also be a list on most recent models, so this PR allows all `EOS` in config be a list as well. Same for q-lora-rank which apparently can be an explicit `None` for some model Also bring back `layer_type_validation` and add\u2026",
-    "changed_files": 92,
+    "additions": 1,
+    "author": "jiqing-feng",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Description Some wav2vec2 models (e.g. audio classification variants) have `vocab_size: null` in their `config.json`. The current type annotation `vocab_size: int = 32` causes `huggingface_hub`'s strict dataclass validation to reject `N\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44851",
-    "created_at": "2026-03-19T09:53:31Z",
-    "deletions": 101,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45108",
+    "created_at": "2026-03-30T03:06:01Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44851/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44851",
+    "files_url": "https://github.com/huggingface/transformers/pull/45108/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45108",
     "labels": [],
-    "merged": true,
-    "number": 44851,
-    "review_comments_count": 2,
-    "state": "closed",
-    "title": "Update some type hints",
-    "updated_at": "2026-03-19T16:30:32Z"
+    "merged": false,
+    "number": 45108,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Fix `Wav2Vec2Config.vocab_size` type to allow `None`",
+    "updated_at": "2026-04-07T08:51:25Z"
   },
   {
-    "additions": 5,
-    "author": "vasqu",
-    "author_association": "MEMBER",
-    "body_excerpt": "See #44458 This is a deep issue tbh - the cross attentions are reshaped into a different shape than the text input leading to a mismatch between batch sizes. This only gets noticed during compile as it is more strict about the concrete sha\u2026",
+    "additions": 1,
+    "author": "jiqing-feng",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Description The `text-to-speech` pipeline crashes when loading models whose `generation_config` contains fields set to `None` (e.g. `use_cache=None`). This is because `TextToAudioPipeline.__init__` blindly copies all generation config v\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44850",
-    "created_at": "2026-03-19T08:36:18Z",
-    "deletions": 6,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44850/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44850",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45107",
+    "created_at": "2026-03-30T02:32:13Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45107/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45107",
     "labels": [],
     "merged": false,
-    "number": 44850,
+    "number": 45107,
     "review_comments_count": 0,
     "state": "open",
-    "title": "[`Mllama`] Fix workaround compile",
-    "updated_at": "2026-03-19T11:58:24Z"
+    "title": "Fix `text-to-speech` pipeline crash when generation config contains `None` values",
+    "updated_at": "2026-04-07T08:52:09Z"
   },
   {
-    "additions": 15,
-    "author": "Sai-Suraj-27",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes these failing [Qwen3OmniModelIntegrationTests](https://github.com/huggingface/transformers/actions/runs/23230643883/job/67524756897#step:14:1131) <img width=\"2292\" height=\"161\" alt=\"image\" src=\"https://github.\u2026",
-    "changed_files": 3,
-    "cluster_id": "cluster-43453-9",
-    "cluster_ids": [
-      "cluster-43453-9"
-    ],
-    "cluster_role": "member",
-    "comments_count": 23,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44848",
-    "created_at": "2026-03-19T07:30:39Z",
-    "deletions": 14,
+    "additions": 72,
+    "author": "rpathade",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes `_process_kwargs_parameters` crashing with `AttributeError` when `@auto_docstring` is applied in a module that uses `from __future__ import annotations`. Fixes #45103 ## Root cause `from __future__ import anno\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45105",
+    "created_at": "2026-03-29T23:37:13Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44848/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44848",
+    "files_url": "https://github.com/huggingface/transformers/pull/45105/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45105",
     "labels": [],
     "merged": false,
-    "number": 44848,
+    "number": 45105,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Fix failing `Qwen3OmniModelIntegrationTests`",
-    "updated_at": "2026-03-26T07:17:55Z"
+    "title": "Fix @auto_docstring crash with from __future__ import annotations in _process_kwargs_parameters",
+    "updated_at": "2026-03-30T00:20:41Z"
   },
   {
-    "additions": 68,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Activated `anti-slop` action. Enabled checks: - `min-account-age: 30` to catch brand-new throwaway accounts, which are common in automated spam waves. - `max-daily-forks: 7` to catch accounts that fork many reposito\u2026",
+    "additions": 13,
+    "author": "hkc5",
+    "author_association": "NONE",
+    "body_excerpt": "## Description Fixes #45103 The `@auto_docstring` decorator crashes at import time when applied to a class in a module that uses `from __future__ import annotations`. This is because `from __future__ import annotations` makes all annotatio\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44847",
-    "created_at": "2026-03-19T07:15:38Z",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45104",
+    "created_at": "2026-03-29T23:26:49Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44847/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44847",
+    "files_url": "https://github.com/huggingface/transformers/pull/45104/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45104",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 45104,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix auto_docstring crash with from __future__ import annotations",
+    "updated_at": "2026-03-30T12:13:20Z"
+  },
+  {
+    "additions": 1664,
+    "author": "HemanthSai7",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Co-authored-by: Vishesht27 This PR adds support for codes for the upcoming Nandi series models. We also appreciate the valuable feedback and thorough review provided by @vasqu and @ArthurZucker \ud83e\udd17\ud83d\ude4f",
+    "changed_files": 11,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45101",
+    "created_at": "2026-03-29T20:35:56Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45101/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45101",
     "labels": [],
     "merged": false,
-    "number": 44847,
-    "review_comments_count": 3,
+    "number": 45101,
+    "review_comments_count": 0,
     "state": "open",
-    "title": "ci: add anti-slop action",
-    "updated_at": "2026-03-26T09:21:55Z"
+    "title": "Adding support for Nandi Models",
+    "updated_at": "2026-04-05T09:53:35Z"
   },
   {
-    "additions": 64,
-    "author": "RicardoLee510520",
+    "additions": 1,
+    "author": "code-runner77",
     "author_association": "FIRST_TIMER",
-    "body_excerpt": "# What does this PR do? Updated the DeiT model card to follow the new standardized format: - Replaced verbose paper abstract with concise model description - Added Pipeline and AutoModel usage examples - Renamed \"Usage tips\" to \"Notes\" - U\u2026",
+    "body_excerpt": "Improve wording in accelerator selection documentation # What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44846",
-    "created_at": "2026-03-19T06:30:53Z",
-    "deletions": 90,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45100",
+    "created_at": "2026-03-29T17:28:59Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44846/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44846",
+    "files_url": "https://github.com/huggingface/transformers/pull/45100/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45100",
     "labels": [],
     "merged": false,
-    "number": 44846,
+    "number": 45100,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[Docs] Update DeiT model card to new format",
-    "updated_at": "2026-03-20T05:30:17Z"
+    "title": "Update accelerator_selection.md",
+    "updated_at": "2026-03-30T13:40:51Z"
   },
   {
-    "additions": 15,
-    "author": "jiqing-feng",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes `torch.compile` failure for Mllama after #42848 introduced a new unified attention mask creation path. The root cause is a **torch inductor C++ codegen bug**: when `padding_mask_function` uses advanced tensor\u2026",
-    "changed_files": 2,
+    "additions": 2,
+    "author": "zendy199x",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? Fixes # (issue) ## Code Agent Policy The Transformers repo is currently being overwhelmed by a large number of PRs and issue comments written by code agents. We are currently bottlenecked by our ability to review an\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44845",
-    "created_at": "2026-03-19T06:14:54Z",
-    "deletions": 5,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45098",
+    "created_at": "2026-03-29T15:50:04Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44845/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44845",
+    "files_url": "https://github.com/huggingface/transformers/pull/45098/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45098",
     "labels": [],
     "merged": false,
-    "number": 44845,
-    "review_comments_count": 1,
-    "state": "open",
-    "title": "Fix Mllama torch.compile failure caused by new attention mask logic",
-    "updated_at": "2026-03-26T01:10:06Z"
+    "number": 45098,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix: incomplete string literal causes syntax error in config docstring checker",
+    "updated_at": "2026-03-30T13:27:17Z"
   },
   {
-    "additions": 8468,
-    "author": "sahilleth",
-    "author_association": "NONE",
-    "body_excerpt": "This PR makes a few small fixes on top of #37875 for the DEIM model: - Ensure `DeimConfig` / `DEIMConfig` and `DeimModel` / `DeimForObjectDetection` are correctly exposed from the `transformers` package. - Fix a configuration docstring lin\u2026",
-    "changed_files": 15,
+    "additions": 771,
+    "author": "baonudesifeizhai",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR extends the InternVL conversion script to support the old `OpenGVLab/InternVL2-1B` and `OpenGVLab/InternVL2-2B` checkpoints. These checkpoints currently rely on remote code and are problematic for downstream\u2026",
+    "changed_files": 14,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44844",
-    "created_at": "2026-03-19T05:50:29Z",
-    "deletions": 0,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45097",
+    "created_at": "2026-03-29T05:29:42Z",
+    "deletions": 30,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44844/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44844",
+    "files_url": "https://github.com/huggingface/transformers/pull/45097/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45097",
     "labels": [],
     "merged": false,
-    "number": 44844,
+    "number": 45097,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix DEIM config export and public API",
-    "updated_at": "2026-03-19T13:18:59Z"
+    "state": "open",
+    "title": "Add old InternVL2-1B/2B support to the InternVL conversion script #45092",
+    "updated_at": "2026-04-02T07:59:13Z"
   },
   {
-    "additions": 26,
-    "author": "omyaaa1",
+    "additions": 5,
+    "author": "hkc5",
     "author_association": "NONE",
-    "body_excerpt": "Reintroduce handling for remote URLs using download_url, which was accidentally removed in recent versions. This restores support for loading image processor configs directly from URLs. Fixes #44821 # What does this PR do? <!-- Congratulat\u2026",
+    "body_excerpt": "## Problem Old remote-code checkpoints (like InternVL2) perform real-tensor operations during model construction (e.g., calling `.item()` on tensors). This causes `RuntimeError: Tensor.item() cannot be called on meta tensors` when models a\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44842",
-    "created_at": "2026-03-19T04:48:58Z",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45096",
+    "created_at": "2026-03-29T05:13:12Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44842/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44842",
+    "files_url": "https://github.com/huggingface/transformers/pull/45096/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45096",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44842,
+    "number": 45096,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix AutoImageProcessor URL loading regression",
-    "updated_at": "2026-03-19T11:57:50Z"
+    "title": "Fix: Skip meta device initialization for remote code models",
+    "updated_at": "2026-03-30T12:13:08Z"
   },
   {
-    "additions": 1,
-    "author": "zhulinchng",
+    "additions": 100,
+    "author": "HanFa",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Fix formatting of code block in weightconverter.md # What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set\u2026",
-    "changed_files": 1,
+    "body_excerpt": "When a config class has been explicitly registered via AutoConfig.register(), it should take precedence over auto_map remote code. Previously, `trust_remote_code=True` with auto_map.AutoConfig in config.json would always load remote code,\u2026",
+    "changed_files": 14,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44839",
-    "created_at": "2026-03-19T01:40:58Z",
-    "deletions": 1,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45094",
+    "created_at": "2026-03-29T04:21:29Z",
+    "deletions": 17,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44839/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44839",
+    "files_url": "https://github.com/huggingface/transformers/pull/45094/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45094",
     "labels": [],
     "merged": true,
-    "number": 44839,
+    "number": 45094,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Correct code block formatting in weightconverter.md",
-    "updated_at": "2026-03-19T07:07:13Z"
+    "title": "fix: prefer registered config over remote code in AutoConfig.from_pretrained",
+    "updated_at": "2026-03-31T14:56:49Z"
   },
   {
-    "additions": 41,
-    "author": "xr843",
+    "additions": 6,
+    "author": "hkc5",
     "author_association": "NONE",
-    "body_excerpt": "## Summary - Fixes #44821 \u2014 `AutoImageProcessor.from_pretrained` fails with `OSError: Repo id must be in the form 'repo_name' or 'namespace/repo_name'` when given a URL - The URL handling branch (`is_remote_url` check) in `get_image_proces\u2026",
-    "changed_files": 2,
+    "body_excerpt": "This PR fixes the unexpected behaviour of helper function `_get_feat_extract_output_lengths` in qwen3_omni_moe as reported in #45083. ## Problem The current implementation incorrectly calculates the output length of the convolutional layer\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44838",
-    "created_at": "2026-03-18T23:53:54Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45091",
+    "created_at": "2026-03-29T00:37:06Z",
+    "deletions": 9,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44838/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44838",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/45091/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45091",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44838,
+    "number": 45091,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix AutoImageProcessor.from_pretrained failing with URL input",
-    "updated_at": "2026-03-19T10:43:53Z"
+    "title": "Fix _get_feat_extract_output_lengths in qwen3_omni_moe",
+    "updated_at": "2026-03-30T12:12:57Z"
   },
   {
-    "additions": 482,
-    "author": "stevhliu",
-    "author_association": "MEMBER",
-    "body_excerpt": "backfills empty model cards like gptoss and nemotronh with a `model-card.md` skill i created. its pretty minimal at the moment and just includes a brief intro and code examples. let me know if there is anything else we should add!",
-    "changed_files": 12,
+    "additions": 17,
+    "author": "hkc5",
+    "author_association": "NONE",
+    "body_excerpt": "## Description Fixes #45084 The `VoxtralProcessor.apply_chat_template` method was calling `_get_template_variables(chat_template)` without first checking if `chat_template` was None. This caused a `TypeError: Can't compile non template nod\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44837",
-    "created_at": "2026-03-18T21:45:31Z",
-    "deletions": 102,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45090",
+    "created_at": "2026-03-29T00:35:00Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44837/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44837",
-    "labels": [],
-    "merged": true,
-    "number": 44837,
+    "files_url": "https://github.com/huggingface/transformers/pull/45090/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45090",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 45090,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[docs] model cards",
-    "updated_at": "2026-03-20T22:40:41Z"
+    "title": "Fix TypeError when chat_template is None in VoxtralProcessor",
+    "updated_at": "2026-03-30T12:10:25Z"
   },
   {
-    "additions": 96,
-    "author": "tyler-romero",
+    "additions": 4,
+    "author": "Krishnachaitanyakc",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Pass cu_seqlens derived from packed attention masks to FLA's ShortConvolution and chunk_gated_delta_rule kernels, preventing recurrent state from leaking across sequence boundaries during packed-sequence training. F\u2026",
-    "changed_files": 2,
+    "body_excerpt": "## Summary Fixes #45003 `_can_set_attn_implementation` and `_can_set_experts_implementation` in `PreTrainedModel` use `sys.modules[cls.__module__]`, which raises `KeyError` when a module has been removed from `sys.modules` at runtime. This\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44836",
-    "created_at": "2026-03-18T20:24:58Z",
-    "deletions": 20,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44836/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44836",
-    "labels": [],
-    "merged": false,
-    "number": 44836,
-    "review_comments_count": 3,
-    "state": "open",
-    "title": "Add cu_seqlens support to OlmoHybridGatedDeltaNet for packed sequences",
-    "updated_at": "2026-03-19T05:34:43Z"
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45089",
+    "created_at": "2026-03-28T16:44:06Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45089/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45089",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 45089,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix: use sys.modules.get() to avoid KeyError in modeling_utils",
+    "updated_at": "2026-03-30T14:19:30Z"
   },
   {
-    "additions": 187,
-    "author": "remi-or",
-    "author_association": "MEMBER",
-    "body_excerpt": "## Summary This PR adds the `return_logprobs` flag to the continuous batching, enabling the user to retrieve the log probabilites of the tokens generated. # Tests Added a test to compare with regular generate and it passes. All tests pass.\u2026",
-    "changed_files": 6,
+    "additions": 6,
+    "author": "knQzx",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "corrects the conv output length calculation in _get_feat_extract_output_lengths which was computing wrong values for the audio encoder. fixes #45083",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44835",
-    "created_at": "2026-03-18T17:48:15Z",
-    "deletions": 83,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45088",
+    "created_at": "2026-03-28T16:40:07Z",
+    "deletions": 9,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44835/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44835",
+    "files_url": "https://github.com/huggingface/transformers/pull/45088/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45088",
     "labels": [],
-    "merged": true,
-    "number": 44835,
+    "merged": false,
+    "number": 45088,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[CB] Add an option to return logprobs",
-    "updated_at": "2026-03-23T18:35:31Z"
+    "title": "fix audio encoder output length formula in qwen3_omni_moe",
+    "updated_at": "2026-03-30T12:53:42Z"
   },
   {
-    "additions": 0,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Oups this slipped through in https://github.com/huggingface/transformers/pull/44833",
-    "changed_files": 7,
+    "additions": 5,
+    "author": "hkc5",
+    "author_association": "NONE",
+    "body_excerpt": "This PR fixes issue #45071 where mypy type checking was broken for PretrainedConfig subclasses. ## Problem In transformers v5.4.0, the PretrainedConfig class was converted to a dataclass with a wrapper around __init__ to accept arbitrary k\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44834",
-    "created_at": "2026-03-18T17:07:11Z",
-    "deletions": 11,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45087",
+    "created_at": "2026-03-28T16:38:11Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44834/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44834",
-    "labels": [],
-    "merged": true,
-    "number": 44834,
+    "files_url": "https://github.com/huggingface/transformers/pull/45087/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45087",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 45087,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Update more modular examples",
-    "updated_at": "2026-03-18T17:18:54Z"
+    "title": "Fix PretrainedConfig type checking with mypy",
+    "updated_at": "2026-03-30T12:12:48Z"
   },
   {
-    "additions": 299,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fix the examples after the config change (https://github.com/huggingface/transformers/pull/41250), and re-run conversion as in general modelings changed quite a bit in the lib.",
-    "changed_files": 19,
+    "additions": 3,
+    "author": "knQzx",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "the function accesses backend_tokenizer.pre_tokenizer but the tokenizer passed is already the raw rust object, so it should be pre_tokenizer directly. fixes #45081",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44833",
-    "created_at": "2026-03-18T16:35:34Z",
-    "deletions": 590,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45086",
+    "created_at": "2026-03-28T16:37:49Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44833/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44833",
+    "files_url": "https://github.com/huggingface/transformers/pull/45086/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45086",
     "labels": [],
-    "merged": true,
-    "number": 44833,
+    "merged": false,
+    "number": 45086,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix and re-run modular converter on examples",
-    "updated_at": "2026-03-18T17:00:44Z"
+    "state": "open",
+    "title": "fix AttributeError in _patch_mistral_regex",
+    "updated_at": "2026-03-28T16:37:49Z"
   },
   {
-    "additions": 192,
-    "author": "IlyasMoutawwakil",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 4,
+    "additions": 7,
+    "author": "hkc5",
+    "author_association": "NONE",
+    "body_excerpt": "This PR fixes #45072. ## Changes ### SwitchTransformers - Fixed a bug in `SwitchTransformersTop1Router.forward()` where `router_logits` was being reassigned to the max probability values instead of keeping the raw logits from the classifie\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44832",
-    "created_at": "2026-03-18T15:33:15Z",
-    "deletions": 155,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45085",
+    "created_at": "2026-03-28T16:28:27Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44832/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44832",
+    "files_url": "https://github.com/huggingface/transformers/pull/45085/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45085",
     "labels": [],
     "merged": false,
-    "number": 44832,
-    "review_comments_count": 10,
-    "state": "open",
-    "title": "DeepGEMM",
-    "updated_at": "2026-03-24T13:39:07Z"
+    "number": 45085,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix dtype mismatches in SwitchTransformers and TimmWrapperModel for bfloat16",
+    "updated_at": "2026-03-30T11:25:14Z"
   },
   {
-    "additions": 2,
-    "author": "zucchini-nlp",
+    "additions": 143,
+    "author": "NielsRogge",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fix loading in Sam3 which currently doesn't match the state dict keys from checkpoint. Adding a correct base model prefix will add it to all state dict keys, making the ckpt load-able <!-- Congratulations! You've ma\u2026",
+    "body_excerpt": "# What does this PR do? This PR updates the conversion script of VidEoMT to convert all remaining checkpoints. Find them here: https://huggingface.co/papers/2602.17807",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44831",
-    "created_at": "2026-03-18T14:50:48Z",
-    "deletions": 2,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45082",
+    "created_at": "2026-03-28T14:07:12Z",
+    "deletions": 14,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44831/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44831",
+    "files_url": "https://github.com/huggingface/transformers/pull/45082/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45082",
     "labels": [],
-    "merged": true,
-    "number": 44831,
+    "merged": false,
+    "number": 45082,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix loading issue in Sam3",
-    "updated_at": "2026-03-18T15:44:01Z"
+    "state": "open",
+    "title": "[VidEoMT] Update conversion script",
+    "updated_at": "2026-03-28T14:16:54Z"
   },
   {
-    "additions": 3308,
-    "author": "lashahub",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR adds `AudioFlamingoNext` as a separate model name that inherits directly from `MusicFlamingo` #43538 and keeps the same architecture and behavior. Changes: - add `audioflamingonext` model files - register it in the auto mappings -\u2026",
-    "changed_files": 35,
+    "additions": 13,
+    "author": "joaquinhuigomez",
+    "author_association": "NONE",
+    "body_excerpt": "## Root cause The v5.4.0 release converted `PreTrainedConfig` from a regular class to a `@dataclass`. This changes how Pydantic handles it when used as a field type in a `BaseModel`: instead of treating it as an opaque arbitrary type, Pyda\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44830",
-    "created_at": "2026-03-18T14:31:45Z",
-    "deletions": 61,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45080",
+    "created_at": "2026-03-28T12:13:57Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44830/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44830",
+    "files_url": "https://github.com/huggingface/transformers/pull/45080/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45080",
     "labels": [],
     "merged": false,
-    "number": 44830,
-    "review_comments_count": 1,
-    "state": "open",
-    "title": "Add AudioFlamingoNext model",
-    "updated_at": "2026-03-20T09:14:54Z"
+    "number": 45080,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix PreTrainedConfig as Pydantic field type after dataclass conversion",
+    "updated_at": "2026-03-29T19:36:24Z"
   },
   {
-    "additions": 101,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Last batch finally! Follow up of https://github.com/huggingface/transformers/pull/44759 and many other",
-    "changed_files": 33,
+    "additions": 50,
+    "author": "javierdejesusda",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes #35141 When `tie_word_embeddings=False`, calling `resize_token_embeddings()` then `post_init()` overwrites the LM head weights with random values. This happens because `_get_resized_lm_head()` returns a new `\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44828",
-    "created_at": "2026-03-18T13:52:32Z",
-    "deletions": 512,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45079",
+    "created_at": "2026-03-28T00:06:03Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44828/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44828",
+    "files_url": "https://github.com/huggingface/transformers/pull/45079/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45079",
     "labels": [],
     "merged": true,
-    "number": 44828,
+    "number": 45079,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Remove cache_position in more models (4 and last one)",
-    "updated_at": "2026-03-18T16:02:46Z"
+    "title": "Fix resized LM head weights being overwritten by post_init",
+    "updated_at": "2026-04-02T14:13:31Z"
   },
   {
-    "additions": 80,
-    "author": "3outeille",
+    "additions": 21,
+    "author": "itazap",
     "author_association": "MEMBER",
-    "body_excerpt": "https://github.com/huggingface/transformers/pull/44825",
-    "changed_files": 6,
+    "body_excerpt": "fixes fallback https://github.com/huggingface/transformers/issues/44993",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44827",
-    "created_at": "2026-03-18T13:36:53Z",
-    "deletions": 14,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45078",
+    "created_at": "2026-03-27T23:06:36Z",
+    "deletions": 9,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44827/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44827",
+    "files_url": "https://github.com/huggingface/transformers/pull/45078/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45078",
     "labels": [],
     "merged": false,
-    "number": 44827,
-    "review_comments_count": 6,
+    "number": 45078,
+    "review_comments_count": 3,
     "state": "open",
-    "title": "Fix Mistral4 tests",
-    "updated_at": "2026-03-23T16:47:42Z"
+    "title": "throw error when conversion required",
+    "updated_at": "2026-03-31T10:09:46Z"
   },
   {
-    "additions": 55,
-    "author": "BillionClaw",
+    "additions": 312,
+    "author": "dagecko",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Fixes #44821 This PR fixes the issue where `AutoImageProcessor.from_pretrained()` was unable to load from a URL (e.g., `https://huggingface.co/.../raw/main/config.json`). The bug was introduced in transformers>=5.3.0. Prior versions (e.g.,\u2026",
-    "changed_files": 2,
+    "body_excerpt": "Re-submission of #45010. Had a problem with my fork and had to delete it, which closed the original PR. Apologies for the noise. @tarekziade @ydshieh I noticed you fixed the critical findings from the original PR, which is great. This resu\u2026",
+    "changed_files": 22,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44826",
-    "created_at": "2026-03-18T12:08:35Z",
-    "deletions": 5,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45077",
+    "created_at": "2026-03-27T22:20:56Z",
+    "deletions": 312,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44826/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44826",
+    "files_url": "https://github.com/huggingface/transformers/pull/45077/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45077",
+    "labels": [],
+    "merged": false,
+    "number": 45077,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "fix: pin 50 unpinned actions to commit SHA, extract 1 secret to env var",
+    "updated_at": "2026-03-27T22:20:56Z"
+  },
+  {
+    "additions": 376,
+    "author": "osman-akkawi",
+    "author_association": "NONE",
+    "body_excerpt": "As Osman Akkawi, I am proud to submit this comprehensive Pull Request which introduces two world-first, unique innovations to the transformers library alongside essential codebase maintenance. This PR transforms how users interact with and\u2026",
+    "changed_files": 8,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45076",
+    "created_at": "2026-03-27T20:30:37Z",
+    "deletions": 9,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45076/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45076",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44826,
+    "number": 45076,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: allow AutoImageProcessor to load from URL",
-    "updated_at": "2026-03-19T13:28:04Z"
+    "title": "Osman-Level Innovations: Hardware-Aware Advisor & Selective Weight Surgery CLI",
+    "updated_at": "2026-04-06T10:49:18Z"
   },
   {
-    "additions": 1,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. cc @3outeille as I know you're looking into it",
-    "changed_files": 1,
+    "additions": 4549,
+    "author": "thisisiron",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Adds the **DeepSeek-OCR2** model. ### Reference - Arxiv Paper: [DeepSeek-OCR 2: Visual Causal Flow](https://arxiv.org/abs/2601.20552) - Huggingface hub: [deepseek-ai/DeepSeek-OCR-2](https://huggingface.co/deepseek-a\u2026",
+    "changed_files": 21,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44825",
-    "created_at": "2026-03-18T12:05:35Z",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45075",
+    "created_at": "2026-03-27T20:14:27Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44825/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44825",
+    "files_url": "https://github.com/huggingface/transformers/pull/45075/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45075",
+    "labels": [],
+    "merged": false,
+    "number": 45075,
+    "review_comments_count": 15,
+    "state": "open",
+    "title": "Add Deepseek-OCR-2 model",
+    "updated_at": "2026-04-06T12:51:07Z"
+  },
+  {
+    "additions": 12,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? The following dtype mismatch use cases were identified and fixed in this PR: \u2192 **Switch Transformers:** [7938e91fa](https://github.com/harshaljanjani/transformers/commit/7938e91faabb051f3a001cd39c173d4697c2d81c) r\u2026",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45074",
+    "created_at": "2026-03-27T20:02:28Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45074/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45074",
     "labels": [],
     "merged": true,
-    "number": 44825,
-    "review_comments_count": 0,
+    "number": 45074,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "[CI] Temporarily skip Mistral4 tests as they almost all fail",
-    "updated_at": "2026-03-18T12:15:34Z"
+    "title": "fix(models): Fix dtype mismatch in SwitchTransformers and TimmWrapperModel",
+    "updated_at": "2026-04-02T13:59:46Z"
   },
   {
-    "additions": 5,
-    "author": "BillionClaw",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Description Fixes #44737 The `relative_positional_encoding` function in XLNet was computing all positional encodings on CPU every forward pass because the `torch.arange` calls were missing the `device` parameter. ## Changes - Added devi\u2026",
-    "changed_files": 1,
+    "additions": 1239,
+    "author": "Aravind-11",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "What does this pr do? - Add modular_owlvit.py inheriting CLIP vision/text embeddings, MLP, encoder layer, encoder - Import box IoU helpers from loss_for_object_detection; eager_attention from BERT - Regenerate modeling_owlvit.py via modula\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44824",
-    "created_at": "2026-03-18T11:55:01Z",
-    "deletions": 4,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45073",
+    "created_at": "2026-03-27T20:00:41Z",
+    "deletions": 135,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44824/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44824",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/45073/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45073",
+    "labels": [],
     "merged": false,
-    "number": 44824,
+    "number": 45073,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix(xlnet): add device parameter to relative_positional_encoding",
-    "updated_at": "2026-03-18T13:17:38Z"
+    "state": "open",
+    "title": "Refactor OwlViT to modular Transformers",
+    "updated_at": "2026-03-30T12:38:55Z"
   },
   {
-    "additions": 41,
-    "author": "BillionClaw",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "This PR fixes #44821 where `AutoImageProcessor.from_pretrained()` couldn't load from a direct URL to a config file. ## Problem When passing a URL like `https://huggingface.co/jinfengxie/BFMS_1014/raw/main/config.json` to `AutoImageProcesso\u2026",
-    "changed_files": 2,
+    "additions": 1,
+    "author": "Fr0do",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes a `TypeError` in `_check_received_keys` (line 919 of `modeling_rope_utils.py`) where `received_keys -= ignore_keys` fails when `ignore_keys` is a `list` instead of a `set`. ## Root cause Model configs (Qwen3.\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44823",
-    "created_at": "2026-03-18T11:54:24Z",
-    "deletions": 5,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45069",
+    "created_at": "2026-03-27T19:21:01Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44823/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44823",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44823,
+    "files_url": "https://github.com/huggingface/transformers/pull/45069/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45069",
+    "labels": [],
+    "merged": true,
+    "number": 45069,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: AutoImageProcessor from URL loading",
-    "updated_at": "2026-03-18T13:17:48Z"
+    "title": "Fix TypeError in rope validation when ignore_keys is a list",
+    "updated_at": "2026-03-30T11:41:12Z"
   },
   {
-    "additions": 4,
-    "author": "BillionClaw",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Fixes #44737 The relative_positional_encoding method in XLNetModel was creating tensors using torch.arange() without specifying device=, causing the entire sinusoidal positional encoding computation to run on CPU every forward pass. Only t\u2026",
+    "additions": 22,
+    "author": "aarushisingh04",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### what does this PR do? this pr enables `trainer.train(resume_from_checkpoint=...)` to accept hugging face hub repository ids. instead of only local paths, users can now pass `user/repo@revision` and the trainer will automatically downlo\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45067",
+    "created_at": "2026-03-27T18:26:03Z",
+    "deletions": 3,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45067/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45067",
+    "labels": [],
+    "merged": false,
+    "number": 45067,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "feat: trainer resume_from_checkpoint support hub downloads (#43375)",
+    "updated_at": "2026-03-27T18:52:10Z"
+  },
+  {
+    "additions": 305,
+    "author": "osman-akkawi",
+    "author_association": "NONE",
+    "body_excerpt": "As **Osman Akkawi**, I am proud to submit this Pull Request which introduces a world-first, unique feature to the `transformers` library alongside essential codebase maintenance. This PR focuses on one goal: making state-of-the-art models\u2026",
+    "changed_files": 7,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44822",
-    "created_at": "2026-03-18T11:48:28Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45066",
+    "created_at": "2026-03-27T17:04:14Z",
     "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44822/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44822",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/45066/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45066",
+    "labels": [],
     "merged": false,
-    "number": 44822,
+    "number": 45066,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: XLNet relative_positional_encoding device placement",
-    "updated_at": "2026-03-18T13:17:30Z"
+    "title": "[PR] Unique Enhancement: Transformers Model Advisor & Legacy Cleanup",
+    "updated_at": "2026-03-27T20:31:59Z"
   },
   {
-    "additions": 14,
-    "author": "BillionClaw",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Fixes #44811 ## Problem When calling `processor.batch_decode(predicted_ids, skip_special_tokens=False)` with the output from `model.generate()` (without `return_dict_in_generate=True`), the `skip_special_tokens` parameter was being ignored\u2026",
+    "additions": 0,
+    "author": "Sai-Suraj-27",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Removes TensorFlow's `\"TF_CPP_MIN_LOG_LEVEL\"` env var. This is no longer needed since TF/Jax are gone. This `utils/print_env.py` script is being used in CI running tests to print, req useful env vars. ## Code Agent\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44820",
-    "created_at": "2026-03-18T10:57:12Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45065",
+    "created_at": "2026-03-27T16:43:30Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44820/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44820",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/45065/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45065",
+    "labels": [],
+    "merged": true,
+    "number": 45065,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Remove unused TensorFlow env var",
+    "updated_at": "2026-03-27T17:24:30Z"
+  },
+  {
+    "additions": 101,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? check modular import can be extremely slow (8mn in CI) we're investigating speeding it up in https://github.com/huggingface/transformers/pull/45046 But we can also shard jobs in CI to mitigate a little bit. This pat\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45064",
+    "created_at": "2026-03-27T16:26:27Z",
+    "deletions": 34,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45064/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45064",
+    "labels": [],
     "merged": false,
-    "number": 44820,
+    "number": 45064,
     "review_comments_count": 0,
+    "state": "open",
+    "title": "refactor: shard checkers",
+    "updated_at": "2026-03-27T17:05:37Z"
+  },
+  {
+    "additions": 176,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR adds some features that makes serving more efficient. It shouldn't impact `generate_batch` at all: - Per-request result delivery via callbacks (replaces shared queue contention). Added `_request_callbacks` d\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45063",
+    "created_at": "2026-03-27T16:07:43Z",
+    "deletions": 30,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45063/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45063",
+    "labels": [],
+    "merged": true,
+    "number": 45063,
+    "review_comments_count": 10,
     "state": "closed",
-    "title": "fix(whisper): respect skip_special_tokens in batch_decode",
-    "updated_at": "2026-03-18T13:17:20Z"
+    "title": "CB improvements for serving ",
+    "updated_at": "2026-03-30T18:48:33Z"
   },
   {
-    "additions": 2,
-    "author": "BillionClaw",
+    "additions": 55,
+    "author": "ErenAta16",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Description This PR fixes the DeepSeek tokenizer issue (#44779) where tokenization produces incorrect results in v5. ### Problem In transformers v5, the DeepSeek tokenizer (DeepSeek-R1) was producing incorrect results: - Input: \"How are\u2026",
-    "changed_files": 1,
+    "body_excerpt": "This PR adds a regression test for Unicode corruption when decoding `added_tokens` with ByteLevel tokenizers (e.g. GPT-2 family). In affected cases, characters such as `\u010d`, `\u0107`, `\u0111` can decode into control characters (`\\r`, `\\x07`, `\\x11`)\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44819",
-    "created_at": "2026-03-18T10:55:22Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45062",
+    "created_at": "2026-03-27T15:23:38Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44819/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44819",
+    "files_url": "https://github.com/huggingface/transformers/pull/45062/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45062",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44819,
-    "review_comments_count": 0,
+    "number": 45062,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "fix(tokenizer): add deepseek_v2 and deepseek_v3 to incorrect hub tokenizer class list",
-    "updated_at": "2026-03-18T14:11:16Z"
+    "title": "Add regression test for ByteLevel added-token Unicode decode corruption",
+    "updated_at": "2026-03-27T23:36:35Z"
   },
   {
-    "additions": 64,
-    "author": "BillionClaw",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Description Fixes #44805 When training multimodal models (Qwen3-VL, GLM-4.6V, Qwen3-VL-MoE) with LoRA adapters, the `attention_mask` and `mm_token_type_ids` tensors can have different shapes. This causes an IndexError when the `get_rope\u2026",
-    "changed_files": 4,
+    "additions": 12,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "As per title, see https://github.com/huggingface/transformers/pull/42435#issuecomment-4143234736",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44818",
-    "created_at": "2026-03-18T10:46:22Z",
-    "deletions": 7,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45061",
+    "created_at": "2026-03-27T15:09:50Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44818/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44818",
+    "files_url": "https://github.com/huggingface/transformers/pull/45061/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45061",
     "labels": [
-      "Code agent slop"
+      "for patch"
     ],
-    "merged": false,
-    "number": 44818,
+    "merged": true,
+    "number": 45061,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: resolve mask shape mismatch IndexError in multimodal VL models",
-    "updated_at": "2026-03-18T10:51:43Z"
+    "title": "[`FA`] Fix BC support for a few versions + add deprecation cycle",
+    "updated_at": "2026-03-27T15:37:13Z"
   },
   {
-    "additions": 28,
-    "author": "JJJYmmm",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fix https://github.com/QwenLM/Qwen3.5/issues/97. This PR adds `enable_thinking` to the chat-template kwargs. With this change, `enable_thinking` is treated as a template-level argument in the tokenize=True path, so\u2026",
-    "changed_files": 2,
+    "additions": 54,
+    "author": "ErenAta16",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "This PR fixes a regression where PIL-based image/video processors were incorrectly treated as requiring `torchvision`. As a result, `AutoProcessor` / `AutoImageProcessor` could fail in environments without `torchvision`, even though a vali\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44817",
-    "created_at": "2026-03-18T10:44:11Z",
-    "deletions": 6,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45060",
+    "created_at": "2026-03-27T13:43:38Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44817/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44817",
+    "files_url": "https://github.com/huggingface/transformers/pull/45060/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45060",
     "labels": [],
     "merged": false,
-    "number": 44817,
+    "number": 45060,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Fix PIL backend fallback when torchvision is unavailable",
+    "updated_at": "2026-03-30T13:50:34Z"
+  },
+  {
+    "additions": 9,
+    "author": "hmellor",
+    "author_association": "MEMBER",
+    "body_excerpt": "Some checkpoints, such as https://huggingface.co/omni-research/Tarsier2-Recap-7b, have the wrong `model_type` in their `config.json`. This PR allows advanced users (vLLM) to pass `model_type` into `AutoConfig.from_pretrained` via `kwargs`\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45058",
+    "created_at": "2026-03-27T13:24:05Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45058/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45058",
+    "labels": [],
+    "merged": true,
+    "number": 45058,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[Misc] add enable_thinking to template kwargs",
-    "updated_at": "2026-03-20T14:56:04Z"
+    "title": "Allow advanced users to override `model_type` in `AutoConfig.from_pretrained`",
+    "updated_at": "2026-03-27T14:29:53Z"
   },
   {
-    "additions": 98,
-    "author": "Cyrilvallez",
+    "additions": 318,
+    "author": "NathanHB",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. This is the last of many PR to remove the `cache_position`. At this point, all the models were already updated to not use them, and they are fully ignored in all the modelings. So this removes thei\u2026",
-    "changed_files": 57,
+    "body_excerpt": "Change model_dump_json() to model_dump() to avoid double JSON encoding. When using continuous batching with stream=false, the response was being double-encoded as a string instead of returning a proper JSON object. Added a UV script to run\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44816",
-    "created_at": "2026-03-18T10:32:04Z",
-    "deletions": 375,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45057",
+    "created_at": "2026-03-27T13:02:59Z",
+    "deletions": 37,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44816/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44816",
+    "files_url": "https://github.com/huggingface/transformers/pull/45057/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45057",
     "labels": [],
     "merged": true,
-    "number": 44816,
-    "review_comments_count": 14,
+    "number": 45057,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[generate] Never use `cache_position` anymore in generation",
-    "updated_at": "2026-03-19T14:18:28Z"
+    "title": "[serving] Fix continuous batching JSON response serialization",
+    "updated_at": "2026-03-31T13:04:32Z"
   },
   {
-    "additions": 135,
+    "additions": 331,
     "author": "ArthurZucker",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 6,
+    "body_excerpt": "# What does this PR do? This is mega long due I wanted to check benches. Its not super super huge but a win is a win",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44815",
-    "created_at": "2026-03-18T09:54:18Z",
-    "deletions": 23,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44815/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44815",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45056",
+    "created_at": "2026-03-27T11:36:10Z",
+    "deletions": 28,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45056/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45056",
     "labels": [],
     "merged": false,
-    "number": 44815,
-    "review_comments_count": 2,
+    "number": 45056,
+    "review_comments_count": 5,
     "state": "open",
-    "title": "Dequant fix",
-    "updated_at": "2026-03-24T14:39:52Z"
+    "title": "[`auto_docstring`] needs to be only run on __doc__ ",
+    "updated_at": "2026-03-29T12:57:04Z"
   },
   {
-    "additions": 2,
-    "author": "BillionClaw",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Fixes #44792 This PR fixes the failing test `test_model_generate_images` for the Janus model. ## Problem When generating images with the Janus model, `generation_config.num_return_sequences` and `generation_config.max_length` can be `None`\u2026",
+    "additions": 3,
+    "author": "vasanthrpjan1-boop",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? When `Trainer` saves a checkpoint for a model that is not a `PreTrainedModel` (e.g. a custom `nn.Module`), it only saves the state dict but not the model config. This means `Model.from_pretrained(ckpt_path)` requir\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44814",
-    "created_at": "2026-03-18T09:51:34Z",
-    "deletions": 2,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45055",
+    "created_at": "2026-03-27T11:31:10Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44814/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44814",
+    "files_url": "https://github.com/huggingface/transformers/pull/45055/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45055",
     "labels": [],
     "merged": false,
-    "number": 44814,
+    "number": 45055,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix(janus): handle None values in image generation mode",
-    "updated_at": "2026-03-18T10:42:50Z"
+    "state": "open",
+    "title": "Save model config in Trainer checkpoints for non-PreTrainedModel models",
+    "updated_at": "2026-03-27T11:31:10Z"
   },
   {
-    "additions": 20,
+    "additions": 3,
     "author": "hf-security-analysis[bot]",
-    "author_association": "NONE",
-    "body_excerpt": "Update `.github/workflows/pr-repo-consistency-bot.yml` workflow configuration. cc @ydshieh Closes huggingface/tracking-issues#26",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Update `.github/workflows/update_metdata.yml` workflow configuration. cc @tarekziade @ydshieh Closes huggingface/tracking-issues#33",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44813",
-    "created_at": "2026-03-18T09:49:16Z",
-    "deletions": 46,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45054",
+    "created_at": "2026-03-27T11:16:11Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44813/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44813",
+    "files_url": "https://github.com/huggingface/transformers/pull/45054/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45054",
     "labels": [],
-    "merged": false,
-    "number": 44813,
+    "merged": true,
+    "number": 45054,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "chore: update pr-repo-consistency-bot.yml",
-    "updated_at": "2026-03-18T10:08:04Z"
+    "title": "chore: update update_metdata.yml",
+    "updated_at": "2026-03-27T15:57:14Z"
   },
   {
-    "additions": 7,
-    "author": "ydshieh",
+    "additions": 1,
+    "author": "Sai-Suraj-27",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes failing [`XCLIPModelIntegrationTests`](https://github.com/huggingface/transformers/actions/runs/23230643883/job/67524760869#step:14:1384). [`self.get_attributes()`](https://github.com/huggingface/transformers/\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45053",
+    "created_at": "2026-03-27T11:11:18Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45053/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45053",
+    "labels": [],
+    "merged": false,
+    "number": 45053,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Fix failing `XCLIPModelIntegrationTest`",
+    "updated_at": "2026-03-30T17:08:14Z"
+  },
+  {
+    "additions": 1,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Some checks (for example, modular checks) really require the installation from PR branch.",
+    "body_excerpt": "# What does this PR do? `.gitignore` was not updated when `mlinter` was refactored",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44812",
-    "created_at": "2026-03-18T09:40:36Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45052",
+    "created_at": "2026-03-27T10:16:17Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44812/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44812",
+    "files_url": "https://github.com/huggingface/transformers/pull/45052/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45052",
     "labels": [],
     "merged": true,
-    "number": 44812,
+    "number": 45052,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix repo-check bot",
-    "updated_at": "2026-03-18T09:49:50Z"
+    "title": "chore: Fix mlinter cache location",
+    "updated_at": "2026-03-27T10:26:38Z"
   },
   {
-    "additions": 7,
-    "author": "LincolnBurrows2017",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Fixed bare except clause in _safe_convert_tensor function to catch only Exception type.",
-    "changed_files": 2,
+    "additions": 2,
+    "author": "albertvillanova",
+    "author_association": "MEMBER",
+    "body_excerpt": "Fix NaN weights on non-rank-0 FSDP processes by using `zeros_like` instead of `empty_like` in `_move_missing_keys_from_meta_to_device` Follow-up to: - #44473 See related downstream issue in `trl` : - https://github.com/huggingface/trl/issu\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45050",
+    "created_at": "2026-03-27T09:19:32Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45050/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45050",
+    "labels": [],
+    "merged": false,
+    "number": 45050,
+    "review_comments_count": 7,
+    "state": "open",
+    "title": "Fix NaN weights on non-rank-0 FSDP processes",
+    "updated_at": "2026-04-03T09:37:48Z"
+  },
+  {
+    "additions": 6,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/45030 After the config validation, all validations are now run after config is initialized. So this config has been wrong from the beginning but we didn't com\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44809",
-    "created_at": "2026-03-18T05:47:03Z",
-    "deletions": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45049",
+    "created_at": "2026-03-27T08:46:46Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44809/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44809",
+    "files_url": "https://github.com/huggingface/transformers/pull/45049/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45049",
     "labels": [
-      "Code agent slop"
+      "for patch"
     ],
-    "merged": false,
-    "number": 44809,
+    "merged": true,
+    "number": 45049,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: replace bare except with Exception in Fuyu image processing",
-    "updated_at": "2026-03-18T13:17:11Z"
+    "title": "Fix when RoPE params are in kwargs",
+    "updated_at": "2026-03-27T16:28:13Z"
   },
   {
-    "additions": 2759,
-    "author": "zhang-prog",
+    "additions": 4,
+    "author": "Sai-Suraj-27",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": null,
-    "changed_files": 31,
-    "cluster_id": "cluster-43098-11",
+    "body_excerpt": "# What does this PR do? Fixes failing [`SmolLM3IntegrationTest:test_model_3b_long_prompt`](https://github.com/huggingface/transformers/actions/runs/23629638266/job/68826332952#step:14:216). `SmolLM3` has [`do_sample=True` by default](https\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-43453-9",
     "cluster_ids": [
-      "cluster-43098-11"
+      "cluster-43453-9"
     ],
     "cluster_role": "member",
-    "comments_count": 12,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44808",
-    "created_at": "2026-03-18T04:29:07Z",
-    "deletions": 21,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45048",
+    "created_at": "2026-03-27T08:45:26Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44808/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44808",
-    "labels": [
-      "New model"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/45048/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45048",
+    "labels": [],
     "merged": true,
-    "number": 44808,
-    "review_comments_count": 53,
-    "state": "closed",
-    "title": "[Model] Add PP-OCRv5_server_rec and  PP-OCRv5_mobile_rec models Support",
-    "updated_at": "2026-03-18T20:24:50Z"
-  },
-  {
-    "additions": 0,
-    "author": "BillionClaw",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "The question-answering pipeline was removed in v5.3 per the migration guide, but the Chinese, Korean, and French quicktour docs still listed it as an available pipeline task. This removes those outdated references to avoid confusing users\u2026",
-    "changed_files": 3,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44807",
-    "created_at": "2026-03-18T03:41:52Z",
-    "deletions": 3,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44807/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44807",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44807,
+    "number": 45048,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "docs(quicktour): remove question-answering pipeline from quicktour tables",
-    "updated_at": "2026-03-18T15:38:09Z"
+    "title": "Fix failing `SmolLM3IntegrationTest`",
+    "updated_at": "2026-03-27T14:18:05Z"
   },
   {
-    "additions": 23,
-    "author": "BillionClaw",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "This PR fixes OOM errors when loading models on AMD Strix Halo APUs. ## Problem AMD Strix Halo (Radeon 8060S/8050S) uses unified memory architecture where memory-mapped file loading doesn't work well with the current amdgpu driver. This ca\u2026",
-    "changed_files": 1,
+    "additions": 503,
+    "author": "Akshay404error",
+    "author_association": "FIRST_TIMER",
+    "body_excerpt": "Fixes #44568 \u2014 restores add_special_tokens behavior for mDeBERTa tokenizer and non-persistent buffers in v5 ## What does this PR do? This PR fixes two v5 regressions: 1. `add_special_tokens=True` no longer added BOS/EOS tokens for the `mic\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44806",
-    "created_at": "2026-03-18T03:33:59Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45047",
+    "created_at": "2026-03-27T08:33:01Z",
+    "deletions": 14,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44806/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44806",
+    "files_url": "https://github.com/huggingface/transformers/pull/45047/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45047",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44806,
+    "number": 45047,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(core_model_loading): disable mmap on Strix Halo to avoid OOM",
-    "updated_at": "2026-03-18T12:30:21Z"
+    "title": "fix: restore add_special_tokens behavior for mDeBERTa tokenizer and n\u2026",
+    "updated_at": "2026-03-27T13:15:58Z"
   },
   {
-    "additions": 137,
-    "author": "stevhliu",
+    "additions": 548,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "updates the peft docs: - a more complete training section with a full code snippet, describe saving behavior, resuming from a checkpoint, and distributed training - adds some undocumented API methods (`delete_adapter`, `active_adapters`) -\u2026",
-    "changed_files": 3,
+    "body_excerpt": "# What does this PR do? Investigate speedups on modular conversion",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44804",
-    "created_at": "2026-03-18T00:08:54Z",
-    "deletions": 89,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44804/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44804",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45046",
+    "created_at": "2026-03-27T08:31:58Z",
+    "deletions": 42,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45046/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45046",
     "labels": [],
-    "merged": true,
-    "number": 44804,
-    "review_comments_count": 3,
-    "state": "closed",
-    "title": "[docs] peft",
-    "updated_at": "2026-03-23T17:14:58Z"
+    "merged": false,
+    "number": 45046,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "investigate modular conversion speedups",
+    "updated_at": "2026-03-27T16:33:45Z"
   },
   {
-    "additions": 1341,
-    "author": "yonigozlan",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? (Finally) add support for checking+fixing both generated files and modular files in `check_auto_docstrings`. Also `auto_docstring` was recently added to configs, and this PR updates `check_auto_docstrings` to suppor\u2026",
-    "changed_files": 244,
+    "additions": 5095,
+    "author": "Lidang-Jiang",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Isolate dependencies, make PIL independant from Torchvision backend Fixes #45042 PR #45029 added `@requires(backends=(\"vision\", \"torch\", \"torchvision\"))` to 67 PIL backend `image_processing_pil_*.py` files. This causes PIL backend class\u2026",
+    "changed_files": 188,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44803",
-    "created_at": "2026-03-17T22:40:45Z",
-    "deletions": 1105,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45045",
+    "created_at": "2026-03-27T08:19:19Z",
+    "deletions": 1270,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44803/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44803",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/45045/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45045",
+    "labels": [
+      "for patch"
+    ],
     "merged": true,
-    "number": 44803,
+    "number": 45045,
     "review_comments_count": 24,
     "state": "closed",
-    "title": "Support Modular (!!) + Configs in `check_auto_docstrings`",
-    "updated_at": "2026-03-24T17:59:12Z"
+    "title": "[Bugfix] Remove incorrect torchvision requirement from PIL backend image processors",
+    "updated_at": "2026-03-30T07:25:49Z"
   },
   {
-    "additions": 12,
-    "author": "itazap",
-    "author_association": "MEMBER",
-    "body_excerpt": "deepseek_v2, deepseek_v3, and modernbert fix for having incorrect tokenizer class on the hub fixes: https://github.com/huggingface/transformers/issues/44779, https://github.com/huggingface/transformers/pull/44783",
-    "changed_files": 2,
+    "additions": 35,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Fix issue in https://github.com/huggingface/transformers/issues/44792. @zucchini-nlp @ydshieh pls help review, thx!",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44801",
-    "created_at": "2026-03-17T17:40:25Z",
-    "deletions": 0,
+    "comments_count": 11,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45044",
+    "created_at": "2026-03-27T07:50:21Z",
+    "deletions": 26,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44801/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44801",
+    "files_url": "https://github.com/huggingface/transformers/pull/45044/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45044",
     "labels": [],
     "merged": true,
-    "number": 44801,
-    "review_comments_count": 0,
+    "number": 45044,
+    "review_comments_count": 11,
     "state": "closed",
-    "title": "deepseek_v2, deepseek_v3, and modernbert fix for having incorrect tokenizer class on the hub",
-    "updated_at": "2026-03-19T13:11:54Z"
+    "title": "fix bug for janus model image generation",
+    "updated_at": "2026-04-02T02:46:14Z"
   },
   {
-    "additions": 36,
-    "author": "aayushbaluni",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #44488 `CamembertTokenizer` raised `ValueError: too many values to unpack (expected 2)` when loading models like `cjvt/sleng-bert` that provide vocab as a dict `{token: id}` from `tokenizer.json` (BPE format). The tokenize\u2026",
+    "additions": 31,
+    "author": "Lidang-Jiang",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes #45003. `_can_set_attn_implementation` and `_can_set_experts_implementation` in `modeling_utils.py` crash with `KeyError` when `cls.__module__` is absent from `sys.modules`. This happens in real-world scenari\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44800",
-    "created_at": "2026-03-17T17:20:35Z",
-    "deletions": 2,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45043",
+    "created_at": "2026-03-27T06:07:20Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44800/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44800",
+    "files_url": "https://github.com/huggingface/transformers/pull/45043/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45043",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44800,
+    "number": 45043,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: handle dict vocab in CamembertTokenizer for tokenizer.json (#44488)",
-    "updated_at": "2026-03-18T15:37:54Z"
+    "title": "[Bugfix] Use sys.modules.get() to avoid KeyError in modeling_utils",
+    "updated_at": "2026-03-27T13:02:19Z"
   },
   {
-    "additions": 333,
-    "author": "stevhliu",
-    "author_association": "MEMBER",
-    "body_excerpt": "updates the Hardware section of the docs for training: - combined CPU/Distributed CPU into a single doc - add more info to the Gaudi doc (mixed precision, torch.compile, distributed training) - add more info to the MPS doc (mixed precision\u2026",
-    "changed_files": 9,
+    "additions": 664,
+    "author": "JJJYmmm",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR addresses the performance issues observed with nn.Conv3d across different PyTorch/cuDNN, such as https://github.com/vllm-project/vllm/pull/27418, https://mp.weixin.qq.com/s/hKRIpB561EdrMY8cbg1hEw. We replace\u2026",
+    "changed_files": 7,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44799",
-    "created_at": "2026-03-17T17:19:51Z",
-    "deletions": 627,
+    "comments_count": 13,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45041",
+    "created_at": "2026-03-27T03:50:54Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44799/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44799",
+    "files_url": "https://github.com/huggingface/transformers/pull/45041/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45041",
     "labels": [],
     "merged": false,
-    "number": 44799,
-    "review_comments_count": 19,
+    "number": 45041,
+    "review_comments_count": 47,
     "state": "open",
-    "title": "[docs] training on specific hardware",
-    "updated_at": "2026-03-23T09:09:32Z"
+    "title": "[inference_fusion] convert conv3d patch embed to linear",
+    "updated_at": "2026-04-02T17:46:04Z"
   },
   {
-    "additions": 17,
-    "author": "divyanks",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 1,
+    "additions": 13,
+    "author": "sywangyi",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "- vision models: @yonigozlan @molbap - CIs: @ydshieh when running tests/models/video_llama_3/test_modeling_video_llama_3.py::VideoLlama3IntegrationTest all fail cause by lm_head.weight is missing.",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44798",
-    "created_at": "2026-03-17T16:51:46Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45040",
+    "created_at": "2026-03-27T02:56:58Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44798/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44798",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/45040/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45040",
+    "labels": [],
     "merged": false,
-    "number": 44798,
+    "number": 45040,
+    "review_comments_count": 3,
+    "state": "open",
+    "title": "Llama3 video fix",
+    "updated_at": "2026-03-27T10:55:26Z"
+  },
+  {
+    "additions": 26,
+    "author": "Lidang-Jiang",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary Fixes #44716 - Propagate `interpolate_pos_encoding` parameter through `PixioModel.forward()`, `PixioBackbone.forward()`, and `PixioEmbeddings.forward()` down to `PixioPatchEmbeddings.forward()` - Follows the same pattern used by\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45039",
+    "created_at": "2026-03-27T02:54:31Z",
+    "deletions": 10,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45039/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45039",
+    "labels": [],
+    "merged": false,
+    "number": 45039,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add MPS (Apple Silicon) example and documentation",
-    "updated_at": "2026-03-18T15:37:09Z"
+    "title": "[Bugfix] Propagate interpolate_pos_encoding through Pixio model",
+    "updated_at": "2026-03-27T12:57:30Z"
   },
   {
     "additions": 1,
-    "author": "vasqu",
+    "author": "ArthurZucker",
     "author_association": "MEMBER",
-    "body_excerpt": "Depends on #44887 and kernels being version `12.3` Works OOB with little changes! Example script for demonstration: ```python from transformers import AutoModelForCausalLM, AutoTokenizer fa_version = 4 #model_id = \"openai/gpt-oss-20b\" mode\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44797",
-    "created_at": "2026-03-17T15:35:59Z",
-    "deletions": 4,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45038",
+    "created_at": "2026-03-27T00:18:35Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44797/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44797",
+    "files_url": "https://github.com/huggingface/transformers/pull/45038/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45038",
     "labels": [],
     "merged": true,
-    "number": 44797,
+    "number": 45038,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[`FA4`] Add kernels fallback",
-    "updated_at": "2026-03-20T19:03:24Z"
+    "title": "style was missing sorry @ydshieh :)",
+    "updated_at": "2026-03-27T00:28:23Z"
   },
   {
-    "additions": 5110,
-    "author": "SunMarc",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR refactors transformers serve so that it is not in a single file. We split it into multiple files with clear responsabilities. - serve_refactored.py \u2014 only CLI args + wiring - server.py \u2014 FastAPI routes and m\u2026",
-    "changed_files": 12,
+    "additions": 1,
+    "author": "asuryateja",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "\u2026_interface docs The `custom_attention` function definition in the attention_interface documentation was missing a colon at the end of the return type annotation, making it invalid Python syntax. # What does this PR do? <!-- Congratulation\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44796",
-    "created_at": "2026-03-17T13:04:06Z",
-    "deletions": 4,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45037",
+    "created_at": "2026-03-26T23:48:04Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44796/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44796",
+    "files_url": "https://github.com/huggingface/transformers/pull/45037/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45037",
     "labels": [],
     "merged": false,
-    "number": 44796,
-    "review_comments_count": 18,
+    "number": 45037,
+    "review_comments_count": 0,
     "state": "open",
-    "title": "[refactor] Serving into proper modules",
-    "updated_at": "2026-03-25T08:07:27Z"
+    "title": "add missing colon in custom_attention function signature in attention\u2026",
+    "updated_at": "2026-03-27T00:17:55Z"
   },
   {
-    "additions": 771,
-    "author": "ArthurZucker",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Adds support for a more generic path, aligned with the rest of the loading! model | PR | main ----|-----|---------- \"gdax/Qwen1.5-MoE-A2.7B_gguf\"| 1min 5s |1min 18s",
-    "changed_files": 9,
+    "additions": 15,
+    "author": "matdou",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes #45030 Configs like `tiny-random/glm-4v` store `rope_theta` at the top level of `config.json` alongside a `rope_scaling` dict (legacy format). For config classes that don't declare `rope_parameters` as a datac\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44794",
-    "created_at": "2026-03-17T11:35:33Z",
-    "deletions": 400,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44794/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44794",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45036",
+    "created_at": "2026-03-26T23:00:35Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45036/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45036",
     "labels": [],
     "merged": false,
-    "number": 44794,
+    "number": 45036,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "[fix] BC for legacy configs with top-level rope_theta when rope_parameters is set via rope_scaling",
+    "updated_at": "2026-03-28T23:47:33Z"
+  },
+  {
+    "additions": 1,
+    "author": "asuryateja",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "The conversion operations table was missing PermuteForRope. Added it with its reverse (itself), consistent with how other operations are documented. PermuteForRope is self-inverse applying it twice returns the original tensor layout. # Wha\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45035",
+    "created_at": "2026-03-26T21:05:17Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45035/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45035",
+    "labels": [],
+    "merged": true,
+    "number": 45035,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Refacto GGUF weight conversion",
-    "updated_at": "2026-03-17T17:03:08Z"
+    "state": "closed",
+    "title": "docs: add PermuteForRope to conversion operations reverse table",
+    "updated_at": "2026-03-26T22:09:53Z"
   },
   {
-    "additions": 40,
-    "author": "BillionClaw",
+    "additions": 113,
+    "author": "sdharani91",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes #44792 - Handles None values in Janus model's image generation mode. The `generate()` method for image generation had several places where it assumed certain config values would always be set, causing failure\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? This is a follow up to https://github.com/huggingface/transformers/pull/44867 This PR fixes Qwen3.5 padding-free packed inputs on the linear-attention fast path by consuming collator-provided packed metadata. The li\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44793",
-    "created_at": "2026-03-17T11:29:47Z",
-    "deletions": 6,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45034",
+    "created_at": "2026-03-26T20:52:51Z",
+    "deletions": 9,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44793/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44793",
+    "files_url": "https://github.com/huggingface/transformers/pull/45034/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45034",
     "labels": [],
     "merged": false,
-    "number": 44793,
-    "review_comments_count": 2,
+    "number": 45034,
+    "review_comments_count": 38,
     "state": "open",
-    "title": "fix(janus): Handle None values in image generation mode",
-    "updated_at": "2026-03-18T10:43:24Z"
+    "title": "Pass packed boundary metadata to Qwen3.5 linear-attention fast kernels from data collator",
+    "updated_at": "2026-04-06T13:12:20Z"
   },
   {
-    "additions": 20,
-    "author": "BillionClaw",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? AMD Strix Halo APUs (gfx1151) have a driver bug where safetensors mmap doesn't release memory properly with the unified memory architecture. This causes OOM errors when loading models that should fit in memory (e.g.\u2026",
+    "additions": 3,
+    "author": "hmellor",
+    "author_association": "MEMBER",
+    "body_excerpt": "In https://github.com/huggingface/transformers/pull/43514, `BaseImageProcessorFast` became `BaseImageProcessor` and `_further_process_kwargs` was renamed to `_standardize_kwargs` This PR adds some BC for the old name of this method.",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44791",
-    "created_at": "2026-03-17T10:33:22Z",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45033",
+    "created_at": "2026-03-26T20:36:40Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44791/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44791",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44791,
+    "files_url": "https://github.com/huggingface/transformers/pull/45033/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45033",
+    "labels": [],
+    "merged": true,
+    "number": 45033,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: disable mmap on Strix Halo APUs to avoid OOM",
-    "updated_at": "2026-03-18T12:33:05Z"
+    "title": "Add BC for `_further_process_kwargs`",
+    "updated_at": "2026-03-26T21:01:32Z"
   },
   {
-    "additions": 72,
-    "author": "tarekziade",
+    "additions": 163,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This patch - adds a simple cache to the model linter so we skip files that did not change and were valid - reworks `Makefile` targets",
-    "changed_files": 6,
+    "body_excerpt": "# What does this PR do? Use multi runners to get new failing tests in a CI run.",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44790",
-    "created_at": "2026-03-17T08:54:47Z",
-    "deletions": 19,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45032",
+    "created_at": "2026-03-26T18:48:18Z",
+    "deletions": 67,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44790/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44790",
+    "files_url": "https://github.com/huggingface/transformers/pull/45032/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45032",
     "labels": [],
     "merged": true,
-    "number": 44790,
-    "review_comments_count": 1,
+    "number": 45032,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "feat: added cache to the model linter",
-    "updated_at": "2026-03-24T15:28:29Z"
+    "title": "Use multi runners to check new failing tests in a CI run",
+    "updated_at": "2026-03-26T18:59:08Z"
   },
   {
-    "additions": 21,
-    "author": "ydshieh",
+    "additions": 6,
+    "author": "tomaarsen",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Some configs from the hub have different types.",
-    "changed_files": 7,
+    "body_excerpt": "# What does this PR do? Follow-up for https://github.com/huggingface/transformers/pull/44931, which added weight tying for Camembert. Only the CamembertForMaskedLM class had the right _tied_weights_keys, the CamembertForCausalLM had the in\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44789",
-    "created_at": "2026-03-17T08:41:30Z",
-    "deletions": 21,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45031",
+    "created_at": "2026-03-26T18:28:54Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44789/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44789",
+    "files_url": "https://github.com/huggingface/transformers/pull/45031/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45031",
     "labels": [],
     "merged": true,
-    "number": 44789,
-    "review_comments_count": 5,
+    "number": 45031,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix config loading issues (type issues)",
-    "updated_at": "2026-03-17T09:44:50Z"
+    "title": "[`fix`] Use the correct _tied_weights_keys for CamembertForCausalLM",
+    "updated_at": "2026-03-26T18:57:29Z"
   },
   {
-    "additions": 0,
-    "author": "BillionClaw",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "The pipeline() docstring included an example using the 'question-answering' task, but this task is not in SUPPORTED_TASKS and will raise an error when used. Remove this outdated example to avoid confusing users following the documentation.\u2026",
-    "changed_files": 1,
+    "additions": 478,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Release workflow is failing",
+    "changed_files": 101,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 9,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44788",
-    "created_at": "2026-03-17T08:38:25Z",
-    "deletions": 5,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45029",
+    "created_at": "2026-03-26T18:04:37Z",
+    "deletions": 768,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44788/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44788",
+    "files_url": "https://github.com/huggingface/transformers/pull/45029/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45029",
+    "labels": [],
+    "merged": true,
+    "number": 45029,
+    "review_comments_count": 7,
+    "state": "closed",
+    "title": "Fix release full",
+    "updated_at": "2026-03-27T06:34:00Z"
+  },
+  {
+    "additions": 1928,
+    "author": "3outeille",
+    "author_association": "MEMBER",
+    "body_excerpt": "how does dtensor works with quantization ? # Verify loading ```python # python verify_loading.py --mode single_gpu # torchrun --nproc_per_node=4 verify_loading.py --mode fsdp # torchrun --nproc_per_node=4 verify_loading.py --mode tp # torc\u2026",
+    "changed_files": 17,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45028",
+    "created_at": "2026-03-26T17:50:07Z",
+    "deletions": 1191,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/45028/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45028",
     "labels": [],
     "merged": false,
-    "number": 44788,
+    "number": 45028,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "docs(pipelines): remove outdated question-answering example",
-    "updated_at": "2026-03-23T17:19:33Z"
+    "state": "open",
+    "title": "TP refactor for FSDP + TP integration",
+    "updated_at": "2026-04-07T09:01:40Z"
   },
   {
-    "additions": 4,
-    "author": "BillionClaw",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "The question-answering pipeline was removed in v5.0.0 per MIGRATION_GUIDE_V5.md, but the non-English task guides still referenced it. This updates the Arabic, Chinese, Japanese, and Korean question answering task guides to remove usage of\u2026",
-    "changed_files": 4,
+    "additions": 714,
+    "author": "remi-or",
+    "author_association": "MEMBER",
+    "body_excerpt": "# Summary This PR adds per-request logits processors and overalls the way CB handles logits processors. It introduces batched logits processing with per-request parameters for continuous batching, enabling each request in a batch to use di\u2026",
+    "changed_files": 11,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44787",
-    "created_at": "2026-03-17T08:24:09Z",
-    "deletions": 66,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45026",
+    "created_at": "2026-03-26T17:00:07Z",
+    "deletions": 202,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44787/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44787",
+    "files_url": "https://github.com/huggingface/transformers/pull/45026/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45026",
     "labels": [],
     "merged": true,
-    "number": 44787,
-    "review_comments_count": 0,
+    "number": 45026,
+    "review_comments_count": 32,
     "state": "closed",
-    "title": "docs(tasks): remove references to removed question-answering pipeline",
-    "updated_at": "2026-03-17T16:23:50Z"
+    "title": "[CB] Add per-request logits processors",
+    "updated_at": "2026-04-03T16:44:11Z"
   },
   {
-    "additions": 25,
-    "author": "BillionClaw",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "AMD Strix Halo APUs (gfx1151) experience OOM errors when loading large models via safetensors mmap due to unified memory architecture issues. This fix detects Strix Halo GPUs by checking the GPU architecture name (gfx1151) and forces a CPU\u2026",
+    "additions": 2,
+    "author": "layla1824",
+    "author_association": "FIRST_TIMER",
+    "body_excerpt": "This PR adds a clarification comment regarding the behavior of rotary_pct. Currently, rotary_pct may reset to its default value (0.25) after reload due to the use of kwargs.pop. This note helps developers better understand this behavior.",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44786",
-    "created_at": "2026-03-17T08:17:32Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45025",
+    "created_at": "2026-03-26T16:48:53Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44786/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44786",
+    "files_url": "https://github.com/huggingface/transformers/pull/45025/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45025",
     "labels": [],
     "merged": false,
-    "number": 44786,
+    "number": 45025,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(core_model_loading): disable mmap on Strix Halo to avoid OOM",
-    "updated_at": "2026-03-17T10:29:44Z"
+    "title": "Update configuration_Clarify rotary_pct reset behavior in GPTNeoXConfiggpt_neox.py",
+    "updated_at": "2026-03-27T09:05:48Z"
   },
   {
-    "additions": 307,
-    "author": "BillionClaw",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "AMD Strix Halo APUs (e.g., Radeon 8060S) have issues with mmap-based tensor loading from safetensors, causing out-of-memory errors even when sufficient memory is available. This fix: - Adds `is_strix_halo()` helper to detect Strix Halo GPU\u2026",
+    "additions": 1,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? `tf` and `flax` are long gone (unknown extras generate only warnings so this slipped through the cracks)",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44785",
-    "created_at": "2026-03-17T06:55:31Z",
-    "deletions": 83,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45024",
+    "created_at": "2026-03-26T16:21:30Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44785/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44785",
+    "files_url": "https://github.com/huggingface/transformers/pull/45024/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45024",
     "labels": [],
-    "merged": false,
-    "number": 44785,
+    "merged": true,
+    "number": 45024,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(model_loading): Disable mmap on Strix Halo to avoid OOM",
-    "updated_at": "2026-03-17T10:28:06Z"
-  },
-  {
-    "additions": 2,
-    "author": "BillionClaw",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "This PR fixes the DeepSeek tokenizer issue where spaces were lost during decoding in Transformers v5. ## Problem DeepSeek V2 and V3 models use SentencePiece tokenization (like Llama) but were falling back to the generic TokenizersBackend i\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44783",
-    "created_at": "2026-03-17T05:58:54Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44783/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44783",
-    "labels": [],
-    "merged": false,
-    "number": 44783,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "fix(auto): Map deepseek_v2 and deepseek_v3 to LlamaTokenizer",
-    "updated_at": "2026-03-17T11:12:52Z"
+    "title": "chore: remove old extras",
+    "updated_at": "2026-03-27T11:16:03Z"
   },
   {
-    "additions": 6,
-    "author": "JiwaniZakir",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Fixes #44737 `XLNetModel.relative_positional_encoding` was creating all `torch.arange` tensors on CPU by default, then calling `.to(output_h.device)` at the call site to move them. Adds a `device` parameter to `relative_positional_encoding\u2026",
-    "changed_files": 1,
+    "additions": 2529,
+    "author": "eustlb",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? **Integration notes:** For now, this integration does not load mel filters from the checkpoint. The original model was trained backpropagating gradients in it, but we saw previously (with parakeet-ctc) that this doe\u2026",
+    "changed_files": 19,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44782",
-    "created_at": "2026-03-17T05:11:36Z",
-    "deletions": 7,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45023",
+    "created_at": "2026-03-26T15:58:08Z",
+    "deletions": 8,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44782/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44782",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/45023/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45023",
+    "labels": [
+      "New model",
+      "Audio"
+    ],
     "merged": true,
-    "number": 44782,
-    "review_comments_count": 0,
+    "number": 45023,
+    "review_comments_count": 13,
     "state": "closed",
-    "title": "fix: XLNet: relative_positional_encoding computes on CPU every forward",
-    "updated_at": "2026-03-19T13:30:48Z"
+    "title": "Add cohere asr",
+    "updated_at": "2026-03-26T22:48:16Z"
   },
   {
-    "additions": 5,
-    "author": "bensons",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Some model repos provide `extra_special_tokens` as a list in their tokenizer_config.json, which caused an `AttributeError: 'list' object has no attribute 'keys'`. This converts list inputs to a dict mapping each tok\u2026",
-    "changed_files": 2,
+    "additions": 1,
+    "author": "popotest",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44781",
-    "created_at": "2026-03-17T04:59:02Z",
-    "deletions": 2849,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45022",
+    "created_at": "2026-03-26T15:33:19Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44781/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44781",
+    "files_url": "https://github.com/huggingface/transformers/pull/45022/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45022",
     "labels": [],
     "merged": false,
-    "number": 44781,
+    "number": 45022,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Fix `_set_model_specific_special_tokens` to accept list-format `extra_special_tokens`",
-    "updated_at": "2026-03-26T05:56:54Z"
+    "state": "closed",
+    "title": "Update _config.py",
+    "updated_at": "2026-03-27T13:17:36Z"
   },
   {
-    "additions": 145,
-    "author": "LincolnBurrows2017",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Fixed logic error in is_tiktoken_available function. The original code `return with_blobfile and _is_package_available(\"blobfile\")[0] or True` would always return True due to operator precedence.",
-    "changed_files": 8,
+    "additions": 2,
+    "author": "hf-security-analysis[bot]",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Update `.github/workflows/anti-slop.yml` workflow configuration. cc @tarekziade Closes huggingface/tracking-issues#30",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44778",
-    "created_at": "2026-03-16T23:41:29Z",
-    "deletions": 28,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45021",
+    "created_at": "2026-03-26T13:40:18Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44778/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44778",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/45021/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45021",
+    "labels": [],
     "merged": false,
-    "number": 44778,
+    "number": 45021,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: correct logic error in is_tiktoken_available function",
-    "updated_at": "2026-03-18T13:15:37Z"
+    "title": "chore: update anti-slop.yml",
+    "updated_at": "2026-03-26T13:53:57Z"
   },
   {
-    "additions": 35,
-    "author": "stevhliu",
-    "author_association": "MEMBER",
-    "body_excerpt": "adds docs for #43705 (enable bidirectional attention for decoder-only models)",
-    "changed_files": 1,
+    "additions": 21,
+    "author": "javierdejesusda",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes #44877 Loading `ibm-granite/granite-4.0-1b-speech` fails with `StrictDataclassFieldValidationError` because its config.json stores `embedding_multiplier` and `logits_scaling` as integers (e.g. `12`, `8`), but\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44777",
-    "created_at": "2026-03-16T21:58:40Z",
-    "deletions": 0,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45019",
+    "created_at": "2026-03-26T11:19:19Z",
+    "deletions": 12,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44777/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44777",
+    "files_url": "https://github.com/huggingface/transformers/pull/45019/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45019",
     "labels": [],
     "merged": true,
-    "number": 44777,
-    "review_comments_count": 1,
+    "number": 45019,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "[docs] is_causal feature",
-    "updated_at": "2026-03-17T19:50:43Z"
+    "title": "Fix GraniteConfig type hints to accept int for multiplier fields",
+    "updated_at": "2026-03-27T09:30:17Z"
   },
   {
-    "additions": 0,
-    "author": "stevhliu",
+    "additions": 1973,
+    "author": "zucchini-nlp",
     "author_association": "MEMBER",
-    "body_excerpt": "the doc-builder is breaking because it can't find `Mistral4ForQuestionAnswering`, which looks like it doesn't exist",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? I thought a lot about how to make this dynamic for devs (who add models etc.) and keep static for users. The whole thing of automatically inferring config and model-type based on code runs with AST, so we don't have\u2026",
+    "changed_files": 39,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44776",
-    "created_at": "2026-03-16T20:43:33Z",
-    "deletions": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45018",
+    "created_at": "2026-03-26T11:18:13Z",
+    "deletions": 1885,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44776/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44776",
+    "files_url": "https://github.com/huggingface/transformers/pull/45018/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45018",
     "labels": [],
-    "merged": true,
-    "number": 44776,
+    "merged": false,
+    "number": 45018,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "[fix] mistral 4 docs",
-    "updated_at": "2026-03-16T21:11:29Z"
+    "state": "open",
+    "title": "Dynamic auto mapping (PoC)",
+    "updated_at": "2026-04-02T16:42:41Z"
   },
   {
-    "additions": 177,
-    "author": "stevhliu",
-    "author_association": "MEMBER",
-    "body_excerpt": "refactors the current [Parallelism methods](https://huggingface.co/docs/transformers/main/en/perf_train_gpu_many#zero-data-parallelism-pipeline-parallelism-and-model-parallelism-3d-parallelism) doc to: - focus on practical examples of comb\u2026",
+    "additions": 18,
+    "author": "JaredforReal",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? ### Get the rope operation right Before: NeoX split-half style After: GPT-J/interleaved style(`interleaved=True` same as `is_neox_style=Flase`) the right one ### Get rid of `F.relu` Reason: - `F.relu` works with `ac\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44775",
-    "created_at": "2026-03-16T20:23:29Z",
-    "deletions": 109,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44775/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44775",
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45017",
+    "created_at": "2026-03-26T09:21:10Z",
+    "deletions": 28,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45017/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45017",
     "labels": [],
     "merged": false,
-    "number": 44775,
-    "review_comments_count": 0,
+    "number": 45017,
+    "review_comments_count": 5,
     "state": "open",
-    "title": "[docs] n-d parallelism",
-    "updated_at": "2026-03-16T20:28:48Z"
+    "title": "[WIP][Fix] GLM 5 set `apply_rotary_pos_emb` to `is_neox_style=False` && remove `F.relu()`",
+    "updated_at": "2026-03-30T06:52:57Z"
   },
   {
-    "additions": 0,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Remove `is_causal` from `EuroBertConfig`",
+    "additions": 64,
+    "author": "inisis",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44774",
-    "created_at": "2026-03-16T18:56:19Z",
-    "deletions": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45016",
+    "created_at": "2026-03-26T09:09:41Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44774/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44774",
+    "files_url": "https://github.com/huggingface/transformers/pull/45016/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45016",
     "labels": [],
-    "merged": true,
-    "number": 44774,
+    "merged": false,
+    "number": 45016,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Remove `is_causal` from `EuroBertConfig`",
-    "updated_at": "2026-03-17T09:33:21Z"
+    "title": "fix: glm5 inference bug",
+    "updated_at": "2026-03-26T12:10:21Z"
   },
   {
-    "additions": 3,
-    "author": "githubnemo",
-    "author_association": "MEMBER",
-    "body_excerpt": "The links to the quantization offloading were outdated and 4-bit quantization also supports offloading which should be mentioned. cc @SunMarc",
-    "changed_files": 3,
+    "additions": 55,
+    "author": "pnehete23",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Fixes a `KeyError` in `_can_set_attn_implementation` and `_can_set_experts_implementation` when a model's module is absent from `sys.modules`. Fixes #45003 ## Root Cause Both `_can_set_attn_implementation` (line 19\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44772",
-    "created_at": "2026-03-16T18:46:13Z",
-    "deletions": 3,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45015",
+    "created_at": "2026-03-26T08:52:10Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44772/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44772",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/45015/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45015",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44772,
+    "number": 45015,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "bitsandbytes: Update links and docs",
-    "updated_at": "2026-03-17T15:57:56Z"
+    "state": "closed",
+    "title": "fix: guard sys.modules access in _can_set_attn/experts_implementation",
+    "updated_at": "2026-03-26T12:15:50Z"
   },
   {
-    "additions": 2,
+    "additions": 16,
     "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? wtf",
+    "body_excerpt": "# What does this PR do? #30674 refactors the way we obtain CircleCI test files to run for each job. It always puts [\"tests\"] for `tests_hub`, so each commit of each PR will run it, no matter if there is any change to codebase. Let's reduce\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44771",
-    "created_at": "2026-03-16T18:45:11Z",
-    "deletions": 1,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44771/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44771",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45014",
+    "created_at": "2026-03-26T08:52:04Z",
+    "deletions": 3,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45014/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45014",
     "labels": [],
-    "merged": false,
-    "number": 44771,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "wtf",
-    "updated_at": "2026-03-16T18:56:00Z"
+    "merged": true,
+    "number": 45014,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "Don't run `tests_hub` if no tests found",
+    "updated_at": "2026-03-26T09:32:39Z"
   },
   {
-    "additions": 203,
-    "author": "zucchini-nlp",
+    "additions": 325,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fix tests failing because of `strict` type validation and decorate two missing configs, Nemotron and VibeVoice",
-    "changed_files": 12,
+    "body_excerpt": "# What does this PR do? - Adds a new checker in `make chek-repo` that will `import transformers` and count the number of imported module. - Lazy import of torch when doing `import transformers` The change will reduce the import time from ~\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44770",
-    "created_at": "2026-03-16T18:44:03Z",
-    "deletions": 268,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45013",
+    "created_at": "2026-03-26T07:47:23Z",
+    "deletions": 49,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44770/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44770",
+    "files_url": "https://github.com/huggingface/transformers/pull/45013/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45013",
     "labels": [],
     "merged": true,
-    "number": 44770,
-    "review_comments_count": 1,
+    "number": 45013,
+    "review_comments_count": 3,
     "state": "closed",
-    "title": "Fix configs with `@strict`",
-    "updated_at": "2026-03-17T15:39:43Z"
+    "title": "feature: added import complexity checker",
+    "updated_at": "2026-03-31T07:01:08Z"
   },
   {
-    "additions": 145,
-    "author": "LincolnBurrows2017",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Summary The `is_batched_video()` and `convert_pil_frames_to_video()` functions in `src/transformers/video_utils.py` were accessing `videos[0]` without first checking if the list is empty, causing `IndexError` when empty lists are passed\u2026",
-    "changed_files": 8,
+    "additions": 155,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Cache to speed up ast walks, and ast tweak",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44769",
-    "created_at": "2026-03-16T18:40:07Z",
-    "deletions": 28,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45012",
+    "created_at": "2026-03-26T07:10:45Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44769/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44769",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44769,
+    "files_url": "https://github.com/huggingface/transformers/pull/45012/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45012",
+    "labels": [],
+    "merged": true,
+    "number": 45012,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix: Handle empty lists in video_utils functions",
-    "updated_at": "2026-03-18T13:15:55Z"
+    "title": "refactor: added cache in check_repo",
+    "updated_at": "2026-03-30T06:44:23Z"
   },
   {
-    "additions": 20,
-    "author": "michaelbenayoun",
-    "author_association": "MEMBER",
-    "body_excerpt": "The function `add_tensor_parallel_hooks_to_module` has unused parameters, in this PR we: - Remove `tp_plan`, which is not used. - Remove `parameter_name` which is not used - Remove `layer_name`. This parameter is only used for logging purp\u2026",
+    "additions": 8,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "@ydshieh Hi, can you help review? Thx!",
     "changed_files": 1,
+    "cluster_id": "cluster-43324-21",
+    "cluster_ids": [
+      "cluster-43324-21"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45011",
+    "created_at": "2026-03-26T06:27:09Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/45011/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45011",
+    "labels": [],
+    "merged": false,
+    "number": 45011,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "skip 2 invalid test cases for pi0 model",
+    "updated_at": "2026-03-31T06:54:25Z"
+  },
+  {
+    "additions": 75,
+    "author": "dagecko",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Fix: CI/CD Security Vulnerabilities in GitHub Actions Hi! [Runner Guard](https://github.com/Vigilant-LLC/runner-guard), an open-source CI/CD security scanner by [Vigilant Cyber Security](https://www.vigilantdefense.com), identified secu\u2026",
+    "changed_files": 20,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44768",
-    "created_at": "2026-03-16T18:29:52Z",
-    "deletions": 9,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45010",
+    "created_at": "2026-03-26T06:19:29Z",
+    "deletions": 71,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44768/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44768",
+    "files_url": "https://github.com/huggingface/transformers/pull/45010/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45010",
     "labels": [],
     "merged": false,
-    "number": 44768,
-    "review_comments_count": 3,
-    "state": "open",
-    "title": "Remove unused parameters and improve add_tensor_parallel_hooks_t\u2026",
-    "updated_at": "2026-03-24T19:23:13Z"
+    "number": 45010,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix: pin 69 unpinned action(s),extract 2 unsafe expression(s) to env vars",
+    "updated_at": "2026-03-27T22:20:57Z"
   },
   {
-    "additions": 11,
+    "additions": 330,
     "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? EuroBertConfig was missing `@strict(accept_kwargs=True)` unlike its parent LlamaConfig, causing failures when reloading saved configs that include extra keys like `architectures`. Also fixed the test helper passing\u2026",
-    "changed_files": 3,
+    "body_excerpt": "# What does this PR do? This patch improves the docstring checker implementation (redundant AST walks) and adds cache. For the AST calls, 2.3x speedup check_docstrings.py --check_all on my M1: - before : 29.3s - after: 12.6s",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44767",
-    "created_at": "2026-03-16T17:31:26Z",
-    "deletions": 5,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45009",
+    "created_at": "2026-03-26T05:31:41Z",
+    "deletions": 47,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44767/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44767",
+    "files_url": "https://github.com/huggingface/transformers/pull/45009/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45009",
     "labels": [],
     "merged": true,
-    "number": 44767,
-    "review_comments_count": 6,
+    "number": 45009,
+    "review_comments_count": 12,
     "state": "closed",
-    "title": "Fix: Eurobert model was missing @strict decorator and invalid test kwargs",
-    "updated_at": "2026-03-16T19:02:31Z"
+    "title": "refactor: speed up docstring checker",
+    "updated_at": "2026-03-27T07:21:11Z"
   },
   {
-    "additions": 26,
-    "author": "itazap",
-    "author_association": "MEMBER",
-    "body_excerpt": "for when remote code tries to import from `tokenization_xxx_fast`",
-    "changed_files": 2,
+    "additions": 380,
+    "author": "Krishnachaitanyakc",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary Fix type annotation bugs across config classes that cause `@strict` validation errors from `huggingface_hub`. ### Bool fields mistyped as `int` (22 fixes) Fields with boolean defaults (`True`/`False`) were annotated as `int` ins\u2026",
+    "changed_files": 198,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44766",
-    "created_at": "2026-03-16T17:30:23Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45007",
+    "created_at": "2026-03-25T23:12:53Z",
+    "deletions": 380,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44766/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44766",
+    "files_url": "https://github.com/huggingface/transformers/pull/45007/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45007",
     "labels": [],
     "merged": true,
-    "number": 44766,
-    "review_comments_count": 0,
+    "number": 45007,
+    "review_comments_count": 3,
     "state": "closed",
-    "title": "support xxxFast alias in v5 tokenizers",
-    "updated_at": "2026-03-18T13:40:05Z"
+    "title": "fix: correct type annotations across config classes for @strict validation",
+    "updated_at": "2026-04-01T17:50:09Z"
   },
   {
-    "additions": 19,
-    "author": "harshaljanjani",
+    "additions": 3,
+    "author": "Krishnachaitanyakc",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? The following failing tests were identified and fixed in this PR: \u2192 **PaliGemma 2:** The [PaliGemma 1 test class](https://github.com/huggingface/transformers/blob/main/tests/models/paligemma/test_modeling_paligemm\u2026",
-    "changed_files": 2,
+    "body_excerpt": "## Summary Fixes #44869 Adds a bounds check in `_split_tokens_on_unicode()` in `tokenization_whisper.py` to handle trailing Unicode replacement characters (U+FFFD) at the end of decoded token streams without crashing with `IndexError`. ##\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44765",
-    "created_at": "2026-03-16T17:26:22Z",
-    "deletions": 0,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45006",
+    "created_at": "2026-03-25T23:03:00Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44765/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44765",
+    "files_url": "https://github.com/huggingface/transformers/pull/45006/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45006",
     "labels": [],
-    "merged": true,
-    "number": 44765,
+    "merged": false,
+    "number": 45006,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix(testing): Fix PaliGemma 2 and PaddleOCR-VL test failures on main",
-    "updated_at": "2026-03-20T13:55:55Z"
+    "state": "open",
+    "title": "fix: prevent IndexError in Whisper timestamp decode on trailing replacement char",
+    "updated_at": "2026-04-06T01:03:09Z"
   },
   {
-    "additions": 12,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixes the siglip import. that was also crashing the test fetcher",
-    "changed_files": 3,
+    "additions": 10,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? The following failing tests were identified and fixed in this PR (grouped them together since they share related root causes OR the code changes were extremely minimal and didn't warrant separate PRs): \u2192 **Phi-3**\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44764",
-    "created_at": "2026-03-16T17:15:40Z",
-    "deletions": 4,
+    "comments_count": 11,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45004",
+    "created_at": "2026-03-25T19:58:57Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44764/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44764",
+    "files_url": "https://github.com/huggingface/transformers/pull/45004/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45004",
     "labels": [],
     "merged": true,
-    "number": 44764,
-    "review_comments_count": 2,
+    "number": 45004,
+    "review_comments_count": 3,
     "state": "closed",
-    "title": "fix: sig lip import",
-    "updated_at": "2026-03-16T17:38:41Z"
+    "title": "fix(testing): Fix Parakeet, Evolla, Pi0, and Phi-3 test failures on main CI",
+    "updated_at": "2026-03-27T15:06:52Z"
   },
   {
-    "additions": 17,
-    "author": "xenova",
+    "additions": 1,
+    "author": "hmellor",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Adds support for MLP mixers, used by [nvidia/NVIDIA-Nemotron-3-Nano-4B-BF16](https://huggingface.co/nvidia/NVIDIA-Nemotron-3-Nano-4B-BF16). Previously, it would crash because it would not recognize the `-` char in t\u2026",
-    "changed_files": 3,
+    "body_excerpt": "`None` is a valid value that can be used to disable chunked attention in `DynamicCache` and Flex Attention. hf.co/morgendave/EAGLE-Llama-4-Scout-17B-16E-Instruct is an example of a checkpoint which does this.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44763",
-    "created_at": "2026-03-16T17:04:36Z",
-    "deletions": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45002",
+    "created_at": "2026-03-25T17:40:14Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44763/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44763",
+    "files_url": "https://github.com/huggingface/transformers/pull/45002/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45002",
     "labels": [],
-    "merged": false,
-    "number": 44763,
+    "merged": true,
+    "number": 45002,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "[nemotron_h] Add support for MLP mixers",
-    "updated_at": "2026-03-18T11:21:44Z"
+    "state": "closed",
+    "title": "Fix type hint for `attention_chunk_size` in `Llama4TextConfig`",
+    "updated_at": "2026-03-25T20:42:11Z"
   },
   {
-    "additions": 4,
-    "author": "BillionClaw",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "XLNet.relative_positional_encoding creates intermediate tensors on CPU every forward pass because torch.arange was missing the device parameter. This causes unnecessary CPU-GPU transfers when running on CUDA. Added device=self.device to al\u2026",
+    "additions": 20,
+    "author": "Sai-Suraj-27",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? For [torch>=2.10.0](https://docs.pytorch.org/docs/2.10/generated/torch.nn.functional.grouped_mm.html#torch-nn-functional-grouped-mm), the minimum CUDA compute capability requirement for `torch.nn.functional.grouped_\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44762",
-    "created_at": "2026-03-16T16:17:54Z",
-    "deletions": 4,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45001",
+    "created_at": "2026-03-25T17:00:28Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44762/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44762",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/45001/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45001",
+    "labels": [],
     "merged": false,
-    "number": 44762,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: Cache XLNet relative_positional_encoding to avoid CPU computation",
-    "updated_at": "2026-03-18T15:16:14Z"
+    "number": 45001,
+    "review_comments_count": 2,
+    "state": "open",
+    "title": "Add cuda compatibility check for using `grouped_mm`",
+    "updated_at": "2026-03-26T16:10:00Z"
   },
   {
-    "additions": 152,
-    "author": "tarekziade",
+    "additions": 22,
+    "author": "zucchini-nlp",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This adds rule 10: ``` Direct config definitions must use @strict(accept_kwargs=True). ```",
-    "changed_files": 3,
+    "body_excerpt": "# What does this PR do? As per title, after https://github.com/huggingface/transformers/pull/44976 users will be seeing a `missing_weights - lm_head not found` error even though the model doesn't use an lm head On the way also deleted unne\u2026",
+    "changed_files": 8,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44761",
-    "created_at": "2026-03-16T16:05:03Z",
-    "deletions": 7,
+    "comments_count": 11,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/45000",
+    "created_at": "2026-03-25T16:28:55Z",
+    "deletions": 109,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44761/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44761",
+    "files_url": "https://github.com/huggingface/transformers/pull/45000/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/45000",
     "labels": [],
     "merged": true,
-    "number": 44761,
-    "review_comments_count": 7,
+    "number": 45000,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "model-linter: Added rule 10",
-    "updated_at": "2026-03-17T08:52:19Z"
+    "title": "Embedding VLMs don't need a head",
+    "updated_at": "2026-03-27T09:57:18Z"
   },
   {
-    "additions": 2090,
-    "author": "juliendenize",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 15,
+    "additions": 2450,
+    "author": "itazap",
+    "author_association": "MEMBER",
+    "body_excerpt": "## Summary - Auto-generated modular integration for `sarvam` - `modular_sarvam.py` written by Claude Opus 4.6 guided by `modular_model_detector.py` - `modeling_sarvam.py` regenerated from modular via `modular_model_converter.py` ## Test pl\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 12,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44760",
-    "created_at": "2026-03-16T15:54:11Z",
-    "deletions": 4,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44760/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44760",
-    "labels": [
-      "New model"
-    ],
-    "merged": true,
-    "number": 44760,
-    "review_comments_count": 8,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44999",
+    "created_at": "2026-03-25T16:21:37Z",
+    "deletions": 0,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44999/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44999",
+    "labels": [],
+    "merged": false,
+    "number": 44999,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Add Mistral 4",
-    "updated_at": "2026-03-20T10:44:48Z"
+    "title": "Add sarvam model",
+    "updated_at": "2026-03-30T23:21:49Z"
   },
   {
-    "additions": 419,
-    "author": "Cyrilvallez",
+    "additions": 1179,
+    "author": "itazap",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Follow-up of many related PR, last one in time being https://github.com/huggingface/transformers/pull/44602. This PR completes all the models that may need non-trivial treatment. Only about 30-40 models still have m\u2026",
-    "changed_files": 42,
+    "body_excerpt": "## Summary - Auto-generated modular integration for `sarvam` - `modular_sarvam.py` written by Claude Opus 4.6 guided by `modular_model_detector.py` - `modeling_sarvam.py` regenerated from modular via `modular_model_converter.py` ## Test pl\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 10,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44759",
-    "created_at": "2026-03-16T15:38:13Z",
-    "deletions": 983,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44759/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44759",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44997",
+    "created_at": "2026-03-25T14:23:13Z",
+    "deletions": 0,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44997/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44997",
     "labels": [],
-    "merged": true,
-    "number": 44759,
-    "review_comments_count": 26,
+    "merged": false,
+    "number": 44997,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Remove cache_position in more models (3)",
-    "updated_at": "2026-03-18T13:09:37Z"
+    "title": "Add sarvam model",
+    "updated_at": "2026-03-25T14:35:45Z"
   },
   {
-    "additions": 825,
-    "author": "LysandreJik",
+    "additions": 286,
+    "author": "3outeille",
     "author_association": "MEMBER",
-    "body_excerpt": "Provides a nicer feedback when `transformers chat` loads a model, instead of hanging https://github.com/user-attachments/assets/8f68f914-b702-4430-b97f-e8cc25326b70 <p>Adds a <code>POST /load_model</code> endpoint to <code>transformers ser\u2026",
-    "changed_files": 7,
+    "body_excerpt": "- Introduce `DistributedConfig` - `DistributedConfig(tp_size=2, fsdp_size=2) # plans default to \"auto\"` replaces passing separate `tp_plan, tp_size, fsdp_plan kwargs`. Sizes auto-fill (specify one, the other defaults to 1). Plans default t\u2026",
+    "changed_files": 10,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44758",
-    "created_at": "2026-03-16T15:02:15Z",
-    "deletions": 63,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44996",
+    "created_at": "2026-03-25T14:20:25Z",
+    "deletions": 283,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44758/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44758",
+    "files_url": "https://github.com/huggingface/transformers/pull/44996/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44996",
     "labels": [],
     "merged": true,
-    "number": 44758,
-    "review_comments_count": 20,
+    "number": 44996,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Propagate the model loading from transformers serve to chat",
-    "updated_at": "2026-03-19T17:20:03Z"
+    "title": " from_pretrained distributed refactor (FSDP2 + TP)",
+    "updated_at": "2026-03-26T15:32:27Z"
   },
   {
-    "additions": 1,
-    "author": "dacorvo",
+    "additions": 3639,
+    "author": "itazap",
     "author_association": "MEMBER",
-    "body_excerpt": "## Summary - `_valid_auto_compile_criteria()` gates auto-compilation on `device.type in [\"cuda\", \"xpu\"]`, excluding Neuron devices. This means `torch.compile` never triggers automatically on Neuron even when `StaticCache` is used (which se\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44757",
-    "created_at": "2026-03-16T14:54:38Z",
-    "deletions": 1,
+    "body_excerpt": null,
+    "changed_files": 8,
+    "cluster_id": "cluster-43098-11",
+    "cluster_ids": [
+      "cluster-43098-11"
+    ],
+    "cluster_role": "member",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44994",
+    "created_at": "2026-03-25T14:02:50Z",
+    "deletions": 242,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44757/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44757",
+    "files_url": "https://github.com/huggingface/transformers/pull/44994/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44994",
     "labels": [],
     "merged": false,
-    "number": 44757,
+    "number": 44994,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Add Neuron to auto-compile hardware list",
-    "updated_at": "2026-03-16T15:05:00Z"
+    "state": "closed",
+    "title": "Add sarvam model",
+    "updated_at": "2026-03-25T14:04:38Z"
   },
   {
-    "additions": 4,
-    "author": "zucchini-nlp",
+    "additions": 583,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 6,
+    "body_excerpt": "# What does this PR do? `make check-repo` can be quite slow, this patch adds file-level cache to speed up checks. We get up to a 27x speedup - cold cache : 46s - warm cache : 1.6s",
+    "changed_files": 20,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44755",
-    "created_at": "2026-03-16T14:08:34Z",
-    "deletions": 148,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44992",
+    "created_at": "2026-03-25T11:40:46Z",
+    "deletions": 48,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44755/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44755",
+    "files_url": "https://github.com/huggingface/transformers/pull/44992/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44992",
     "labels": [],
-    "merged": false,
-    "number": 44755,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44992,
+    "review_comments_count": 4,
     "state": "closed",
-    "title": "Dont merge, testing smth",
-    "updated_at": "2026-03-18T10:09:15Z"
+    "title": "refactoring: speedup static checks with disk cache",
+    "updated_at": "2026-03-31T12:34:20Z"
   },
   {
-    "additions": 20,
-    "author": "gambletan",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #44737 - Added `device=self.device` to all four `torch.arange()` calls in `XLNetModel.relative_positional_encoding()` so that intermediate tensors are created directly on the model's device instead of always on CPU. - With\u2026",
-    "changed_files": 2,
+    "additions": 8,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? - BC for check model inputs",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44753",
-    "created_at": "2026-03-16T14:01:08Z",
-    "deletions": 4,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44990",
+    "created_at": "2026-03-25T10:26:20Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44753/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44753",
+    "files_url": "https://github.com/huggingface/transformers/pull/44990/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44990",
     "labels": [],
-    "merged": false,
-    "number": 44753,
+    "merged": true,
+    "number": 44990,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: add device= to torch.arange in XLNet relative_positional_encoding",
-    "updated_at": "2026-03-18T13:28:40Z"
+    "title": "More small vllm fixes",
+    "updated_at": "2026-03-25T13:05:44Z"
   },
   {
-    "additions": 100,
-    "author": "LincolnBurrows2017",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR fix? The `flatten()` function in `tokenization_utils_base.py` had a bug where it was checking `arr[0]` instead of `sub_arr` when determining if an element should be recursively flattened. ### Bug Details - **File**: `s\u2026",
-    "changed_files": 6,
+    "additions": 1,
+    "author": "3outeille",
+    "author_association": "MEMBER",
+    "body_excerpt": "- Steps breakdown: - FSDP + TP: - https://github.com/huggingface/transformers/pull/44083 - [Request](https://github.com/huggingface/transformers/pull/44083#pullrequestreview-3975401342) to use our loading method https://github.com/huggingf\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44751",
-    "created_at": "2026-03-16T13:40:44Z",
-    "deletions": 29,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44751/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44751",
-    "labels": [
-      "Code agent slop"
-    ],
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44989",
+    "created_at": "2026-03-25T09:10:02Z",
+    "deletions": 0,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44989/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44989",
+    "labels": [],
     "merged": false,
-    "number": 44751,
+    "number": 44989,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix: Correct variable reference in flatten() function",
-    "updated_at": "2026-03-18T13:16:12Z"
+    "state": "open",
+    "title": "\ud83d\udea8 Distributed training API",
+    "updated_at": "2026-03-26T18:13:54Z"
   },
   {
-    "additions": 1,
-    "author": "juliendenize",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR adds `apply_yarn_scaling` as an optional key for yarn repo. This was requested as part of a vLLM PR https://github.com/vllm-project/vllm/pull/37104 that seeks to silence some rope issues when converting Mist\u2026",
-    "changed_files": 1,
+    "additions": 659,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Adds Rule 15 ``` if _tied_weights_keys is present and non-empty in modeling -> Config MUST contain the tie_word_embeddings field ```",
+    "changed_files": 14,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44747",
-    "created_at": "2026-03-16T10:32:54Z",
+    "comments_count": 14,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44988",
+    "created_at": "2026-03-25T07:08:20Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44747/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44747",
+    "files_url": "https://github.com/huggingface/transformers/pull/44988/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44988",
     "labels": [],
     "merged": false,
-    "number": 44747,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Add apply_yarn_scaling as optional key to yarn",
-    "updated_at": "2026-03-16T12:48:08Z"
+    "number": 44988,
+    "review_comments_count": 3,
+    "state": "open",
+    "title": "typing: rule 15 - checks for tie_word_embeddings presence",
+    "updated_at": "2026-04-01T07:56:15Z"
   },
   {
-    "additions": 202,
-    "author": "LincolnBurrows2017",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Fixed Issue #44737: XLNet relative_positional_encoding function missing device parameter in torch.arange calls.",
-    "changed_files": 11,
+    "additions": 0,
+    "author": "Krishnachaitanyakc",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary Fixes #44855 On Python 3.13, placing a `# Copied from` comment between `@torch.jit.script` and the function definition causes an `IndentationError`. This happens because `torch.jit.script` calls `inspect.getsource()` followed by\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44745",
-    "created_at": "2026-03-16T09:39:30Z",
-    "deletions": 33,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44986",
+    "created_at": "2026-03-25T03:18:31Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44745/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44745",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44745,
+    "files_url": "https://github.com/huggingface/transformers/pull/44986/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44986",
+    "labels": [],
+    "merged": true,
+    "number": 44986,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: add device parameter to torch.arange calls in XLNet",
-    "updated_at": "2026-03-18T13:16:43Z"
+    "title": "fix: remove Copied from comments between @torch.jit.script and def for Python 3.13 compat",
+    "updated_at": "2026-03-25T13:39:54Z"
   },
   {
-    "additions": 35,
-    "author": "kaixuanliu",
+    "additions": 2,
+    "author": "Krishnachaitanyakc",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": null,
+    "body_excerpt": "## Summary Fixes #44913 When creating a `GPTNeoXConfig` (or `GPTNeoXJapaneseConfig`) with a non-default `rotary_pct`, the value is lost after a `save_pretrained` / `from_pretrained` round-trip. This happens because `convert_rope_params_to_\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 11,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44739",
-    "created_at": "2026-03-16T07:15:33Z",
-    "deletions": 7,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44985",
+    "created_at": "2026-03-25T02:15:04Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44739/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44739",
+    "files_url": "https://github.com/huggingface/transformers/pull/44985/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44985",
     "labels": [],
-    "merged": false,
-    "number": 44739,
-    "review_comments_count": 12,
-    "state": "open",
-    "title": "fix series of failed test case for janus model",
-    "updated_at": "2026-03-25T20:26:39Z"
-  },
-  {
-    "additions": 6,
-    "author": "yunhaoli24",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Fixes #44031 ## The Problem The condition for calling `_patch_mistral_regex` was too broad (`vocab_size > 100000`), causing non-Mistral models like Qwen, LLaMA, BGE-Reranker to show incorrect regex pattern warnings\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44736",
-    "created_at": "2026-03-16T06:00:47Z",
-    "deletions": 1,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44736/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44736",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44736,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44985,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "fix: resolve false-positive regex warning for non-mistral models",
-    "updated_at": "2026-03-18T15:08:59Z"
+    "title": "fix: preserve rotary_pct across save/load cycle in GPTNeoX configs",
+    "updated_at": "2026-03-27T09:19:46Z"
   },
   {
-    "additions": 1,
-    "author": "mango766",
+    "additions": 2,
+    "author": "Butanium",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes a crash in `transformers serve` when the `/v1/responses` streaming endpoint attempts to reuse a KV cache from a previous request in the same conversation session. ### The bug In `generate_response`, `inputs`\u2026",
+    "body_excerpt": "## What does this PR do? `maybe_autocast` calls `torch.is_autocast_enabled(device_type)` which raises a `RuntimeError` when `device_type` is `\"meta\"`: ``` RuntimeError: unknown device type for autocast in get_autocast_dispatch_key_from_dev\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44735",
-    "created_at": "2026-03-16T04:09:32Z",
-    "deletions": 1,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44984",
+    "created_at": "2026-03-25T01:39:23Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44735/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44735",
+    "files_url": "https://github.com/huggingface/transformers/pull/44984/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44984",
     "labels": [],
     "merged": true,
-    "number": 44735,
+    "number": 44984,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix tensor indexing crash in serve generate_response KV cache continuation",
-    "updated_at": "2026-03-16T15:27:59Z"
+    "title": "Fix `maybe_autocast` crashing on meta device tensors",
+    "updated_at": "2026-03-25T17:45:03Z"
   },
   {
-    "additions": 28,
-    "author": "kaixuanliu",
+    "additions": 26,
+    "author": "Hyungkeun-Park-Nota",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "@IlyasMoutawwakil, pls help review, thx!",
-    "changed_files": 3,
+    "body_excerpt": "## What does this PR do? Fixes `save_pretrained()` for models loaded with `dequantize=True`. `save_pretrained` calls `reverse_op` on all weight conversion operations from loading. Dequantize ops (`Mxfp4Dequantize`, `Fp8Dequantize`, `MetalD\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44733",
-    "created_at": "2026-03-16T02:55:54Z",
-    "deletions": 10,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44983",
+    "created_at": "2026-03-25T01:19:59Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44733/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44733",
+    "files_url": "https://github.com/huggingface/transformers/pull/44983/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44983",
     "labels": [],
     "merged": true,
-    "number": 44733,
-    "review_comments_count": 0,
+    "number": 44983,
+    "review_comments_count": 6,
     "state": "closed",
-    "title": "Fix bug and add XPU Expectations for qwen2 and jamba tests",
-    "updated_at": "2026-03-16T11:47:16Z"
+    "title": "fix: add identity reverse_op to dequantize ops for save_pretrained",
+    "updated_at": "2026-03-27T17:21:18Z"
   },
   {
-    "additions": 1,
-    "author": "Defalt-Meh",
+    "additions": 108,
+    "author": "AkshajKashyap",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? While running SmolVLM tests I noticed this warning in the output: ``` tests/test_video_processing_common.py:57: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider convert\u2026",
-    "changed_files": 1,
+    "body_excerpt": "Fixes #43039 ## What does this PR do? When `prediction_loss_only=True` during evaluation and `use_liger_kernel=True`, `Trainer.prediction_step` now passes `skip_logits=True` to the model forward if the forward signature supports it and lab\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44731",
-    "created_at": "2026-03-15T23:26:31Z",
-    "deletions": 1,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44981",
+    "created_at": "2026-03-25T00:38:02Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44731/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44731",
+    "files_url": "https://github.com/huggingface/transformers/pull/44981/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44981",
     "labels": [],
     "merged": false,
-    "number": 44731,
+    "number": 44981,
     "review_comments_count": 0,
     "state": "open",
-    "title": "[Tests] Fix slow video tensor creation from list of numpy arrays in SmolVLM",
-    "updated_at": "2026-03-15T23:26:31Z"
+    "title": "Trainer: set skip_logits for loss-only eval when liger enabled",
+    "updated_at": "2026-04-03T03:22:23Z"
   },
   {
-    "additions": 9,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? It's unclear why the config class has `model_type = \"mlcd_vision_model\"` but the model on the hub has \"model_type\": \"mlcd\". This leads to the following failures (load from hub --> save locally --> local locally) ```\u2026",
-    "changed_files": 2,
+    "additions": 6,
+    "author": "kallewoof",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Pre-patch unnecessarily breaks merging a LoRA adapter with a model using CUDA_VISIBLE_DEVICES= e.g. when VRAM is insufficient. It also breaks non-cuda machine operations (such as merging). # What does this PR do? This PR un-breaks `CUDA_VI\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44730",
-    "created_at": "2026-03-15T20:44:32Z",
-    "deletions": 4,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44980",
+    "created_at": "2026-03-24T23:50:07Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44730/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44730",
+    "files_url": "https://github.com/huggingface/transformers/pull/44980/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44980",
     "labels": [],
-    "merged": true,
-    "number": 44730,
-    "review_comments_count": 6,
+    "merged": false,
+    "number": 44980,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix `mlcd` auto config/model/mapping issues",
-    "updated_at": "2026-03-16T12:12:30Z"
+    "title": "bug-fix: do not assume torch.cuda is available when setting up norm values, even if flash linear attention is available",
+    "updated_at": "2026-03-27T13:25:18Z"
   },
   {
-    "additions": 214,
-    "author": "xenova",
+    "additions": 492,
+    "author": "michaelbenayoun",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR introduces a helper utility function, `int_div_ceil`, which performs `math.ceil(a / b)` for non-negative integer operands. This is necessary as the current approach is both error-prone and imprecise (especia\u2026",
-    "changed_files": 58,
+    "body_excerpt": "# What does this PR do? Introduces `src/transformers/module_fusion.py`, a utility for fusing adjacent submodules in a model into a single FusedModule that executes them as a chain in one forward pass. The key components are: - `RegistryCol\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44729",
-    "created_at": "2026-03-15T20:29:38Z",
-    "deletions": 225,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44979",
+    "created_at": "2026-03-24T22:33:31Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44729/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44729",
+    "files_url": "https://github.com/huggingface/transformers/pull/44979/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44979",
     "labels": [],
     "merged": false,
-    "number": 44729,
+    "number": 44979,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Avoid floating point math for ceil operations",
-    "updated_at": "2026-03-15T20:49:34Z"
+    "title": "Module Fusion API",
+    "updated_at": "2026-03-30T19:32:58Z"
   },
   {
-    "additions": 88,
-    "author": "ajmeese7",
+    "additions": 4,
+    "author": "cjkindel",
     "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? Fixes a GPU memory leak in `Bnb4bitQuantize.convert()` where float16 source tensors are never freed during 4-bit quantized model loading via `from_pretrained`, causing OOM on models whose float16 size exceeds GPU VR\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? `_can_set_attn_implementation` and `_can_set_experts_implementation` both do a direct subscript lookup into `sys.modules`: ```python class_module = sys.modules[cls.__module__] ``` If the module is not registered und\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44728",
-    "created_at": "2026-03-15T19:56:44Z",
-    "deletions": 1,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44978",
+    "created_at": "2026-03-24T21:01:11Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44728/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44728",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44978/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44978",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44728,
+    "number": 44978,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix float16 memory leak during 4-bit quantized model loading",
-    "updated_at": "2026-03-16T20:53:54Z"
+    "title": "fix: handle absent sys.modules entry in modeling_utils",
+    "updated_at": "2026-03-26T12:25:31Z"
   },
   {
-    "additions": 202,
-    "author": "LincolnBurrows2017",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Fixed issue where kwargs like force_download, proxies, token were not being passed to cached_file function.",
-    "changed_files": 11,
+    "additions": 2,
+    "author": "hmellor",
+    "author_association": "MEMBER",
+    "body_excerpt": "- Adds a type hint to `ModernVBertForMaskedLM.__init__` - Removes `tie_word_embeddings` from `Qwen2VLTextConfig` (and therefore also `Qwen2_5_VLTextConfig`) because it's not valid for these models - Remove hack from `ColQwen2Config` (and t\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44727",
-    "created_at": "2026-03-15T19:41:24Z",
-    "deletions": 33,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44976",
+    "created_at": "2026-03-24T19:26:33Z",
+    "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44727/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44727",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44727,
-    "review_comments_count": 0,
+    "files_url": "https://github.com/huggingface/transformers/pull/44976/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44976",
+    "labels": [],
+    "merged": true,
+    "number": 44976,
+    "review_comments_count": 3,
     "state": "closed",
-    "title": "fix: AutoProcessor.from_pretrained not passing kwargs to cached_file",
-    "updated_at": "2026-03-18T13:15:46Z"
+    "title": "Fix tie_word_embedding issues with `Qwen2VL`",
+    "updated_at": "2026-03-24T20:55:15Z"
   },
   {
-    "additions": 198,
-    "author": "LincolnBurrows2017",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Replaced bare except clause with except Exception in _safe_convert_tensor function to follow Python best practices (PEP 8).",
-    "changed_files": 10,
+    "additions": 6971,
+    "author": "philippguevorguian",
+    "author_association": "NONE",
+    "body_excerpt": null,
+    "changed_files": 20,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44725",
-    "created_at": "2026-03-15T17:41:18Z",
-    "deletions": 29,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44975",
+    "created_at": "2026-03-24T17:12:31Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44725/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44725",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44975/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44975",
+    "labels": [],
     "merged": false,
-    "number": 44725,
+    "number": 44975,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: replace bare except with Exception in Fuyu image processing",
-    "updated_at": "2026-03-18T13:16:22Z"
+    "title": "fix: rebase main; clean config reads, ImageProcessor backend, misc cleanup",
+    "updated_at": "2026-03-24T17:13:42Z"
   },
   {
-    "additions": 6,
-    "author": "ydshieh",
+    "additions": 1084,
+    "author": "3outeille",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? TO be explained.",
-    "changed_files": 5,
+    "body_excerpt": "TODO: - Saving seems to take a bit of time tho. Need investigation - Need to check if it works in 1D (FSDP or TP)and 2D (FSDP + TP). Running the script from https://github.com/huggingface/transformers/pull/44996 ``` (env_pr-44974-fsdp-core\u2026",
+    "changed_files": 12,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44724",
-    "created_at": "2026-03-15T17:14:12Z",
-    "deletions": 5,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44724/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44724",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44974",
+    "created_at": "2026-03-24T16:13:25Z",
+    "deletions": 332,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44974/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44974",
     "labels": [],
     "merged": false,
-    "number": 44724,
-    "review_comments_count": 1,
+    "number": 44974,
+    "review_comments_count": 0,
     "state": "open",
-    "title": "Fix some missing / incorrect entries in auto files",
-    "updated_at": "2026-03-16T09:59:56Z"
+    "title": "Refactor core_model_loading to support FSDP shard-on-read loading",
+    "updated_at": "2026-03-26T18:04:53Z"
   },
   {
-    "additions": 12,
-    "author": "aashirpersonal",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary This PR fixes #44716 by exposing and forwarding `interpolate_pos_encoding` through the Pixio embedding/model call chain so the option is actually usable from `PixioModel.forward()`. ### Changes - Added `interpolate_pos_encoding:\u2026",
-    "changed_files": 2,
+    "additions": 22,
+    "author": "andylizf",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Adds `.item()` to `max_seqlen = (cu_seqlens[1:] - cu_seqlens[:-1]).max()` in all vision attention modules that pass this value to `flash_attn_varlen_func`. ### Context On **released versions** (e.g. 4.52.4), using\u2026",
+    "changed_files": 19,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44723",
-    "created_at": "2026-03-15T16:52:03Z",
-    "deletions": 6,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44723/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44723",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44723,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix: propagate interpolate_pos_encoding through PixioEmbeddings and PixioModel",
-    "updated_at": "2026-03-18T15:05:52Z"
-  },
-  {
-    "additions": 38,
-    "author": "chandan11248",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Migrates the GPT-J model to use the new `@capture_outputs` and `@can_return_tuple` decorators for standardized output collection, as described in #43979. ### Changes - Added `_can_record_outputs` to `GPTJPreTrained\u2026",
-    "changed_files": 2,
-    "cluster_id": "cluster-43979-28",
-    "cluster_ids": [
-      "cluster-43979-28"
-    ],
-    "cluster_role": "member",
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44722",
-    "created_at": "2026-03-15T15:33:25Z",
-    "deletions": 110,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44973",
+    "created_at": "2026-03-24T15:42:32Z",
+    "deletions": 22,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44722/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44722",
+    "files_url": "https://github.com/huggingface/transformers/pull/44973/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44973",
     "labels": [],
     "merged": false,
-    "number": 44722,
+    "number": 44973,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Refactor gptj output tracing to use standardized decorators",
-    "updated_at": "2026-03-19T18:12:59Z"
+    "title": "Fix max_seqlen type in vision attention for torch.compile + FA2",
+    "updated_at": "2026-03-25T14:12:50Z"
   },
   {
-    "additions": 4,
-    "author": "rsmed31",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #44716 `PixioPatchEmbeddings.forward` already accepted `interpolate_pos_encoding` but it was silently dropped \u2014 never passed from `PixioEmbeddings.forward` or `PixioModel.forward`, making the parameter effectively unusable\u2026",
-    "changed_files": 1,
+    "additions": 17,
+    "author": "Abdennacer-Badaoui",
+    "author_association": "MEMBER",
+    "body_excerpt": "As per title. Updating Gemma3/Gemma3n expectations.",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44718",
-    "created_at": "2026-03-14T23:57:14Z",
-    "deletions": 3,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44972",
+    "created_at": "2026-03-24T15:11:50Z",
+    "deletions": 12,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44718/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44718",
+    "files_url": "https://github.com/huggingface/transformers/pull/44972/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44972",
     "labels": [],
-    "merged": false,
-    "number": 44718,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44972,
+    "review_comments_count": 10,
     "state": "closed",
-    "title": "Fix: propagate interpolate_pos_encoding through PixioEmbeddings and PixioModel",
-    "updated_at": "2026-03-15T17:58:58Z"
+    "title": "[AMD CI] Gemma3/Gemma3n Expectations",
+    "updated_at": "2026-03-24T16:30:03Z"
   },
   {
-    "additions": 15,
-    "author": "ydshieh",
+    "additions": 0,
+    "author": "ArthurZucker",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As discussed internally, some component model classes didn't specify the correct config classes. This PR fixes them (those I could found - because the tiny model creation script fails due to those mistakes).",
-    "changed_files": 7,
+    "body_excerpt": "# What does this PR do? Removed the tokenizer_class attr was never there to begin with, and kwargs are now supported. This was failing some test on vllm ci. Fixes https://buildkite.com/vllm/ci/builds/57601/steps/canvas?sid=019d1aec-aa5a-41\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44715",
-    "created_at": "2026-03-14T21:11:52Z",
-    "deletions": 2,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44971",
+    "created_at": "2026-03-24T14:59:36Z",
+    "deletions": 11,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44715/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44715",
+    "files_url": "https://github.com/huggingface/transformers/pull/44971/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44971",
     "labels": [],
     "merged": true,
-    "number": 44715,
-    "review_comments_count": 0,
+    "number": 44971,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Fix missing / incorrect `config` class in some model class definitions",
-    "updated_at": "2026-03-15T11:19:51Z"
+    "title": "[ `vllm x v5`] nit",
+    "updated_at": "2026-03-24T17:40:05Z"
   },
   {
-    "additions": 181,
-    "author": "LincolnBurrows2017",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Summary Fixes issue #44625: Qwen3.5 num_labels not propagating from core config to text_config. When calling `AutoConfig.from_pretrained(\"Qwen3.5\", num_labels=1)`, the main config gets `num_labels=1` but `text_config` still has default\u2026",
-    "changed_files": 8,
+    "additions": 20,
+    "author": "IlyasMoutawwakil",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44714",
-    "created_at": "2026-03-14T20:42:46Z",
-    "deletions": 26,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44970",
+    "created_at": "2026-03-24T13:49:21Z",
+    "deletions": 76,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44714/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44714",
+    "files_url": "https://github.com/huggingface/transformers/pull/44970/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44970",
     "labels": [],
-    "merged": false,
-    "number": 44714,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44970,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "fix: propagate num_labels to text_config for Qwen models",
-    "updated_at": "2026-03-18T12:56:27Z"
-  },
-  {
-    "additions": 15,
-    "author": "kulkarni-rohan",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Applies the output tracing refactor to ColQwen2ForRetrieval as part of the broader effort tracked in issue #43979 to modernize output handling across all models in the library. Changes in both modular_colqwen2.py and modeling_colqwen2.py:\u2026",
-    "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44713",
-    "created_at": "2026-03-14T20:20:14Z",
-    "deletions": 28,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44713/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44713",
-    "labels": [],
-    "merged": false,
-    "number": 44713,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "[ColQwen2] Refactor output tracing (issue #43979)",
-    "updated_at": "2026-03-14T20:21:24Z"
+    "title": "Fix CPU 16 bytes alignment issue using equivalent fallback",
+    "updated_at": "2026-03-25T09:01:03Z"
   },
   {
-    "additions": 2,
-    "author": "ydshieh",
+    "additions": 4,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? torch 2.11 is going to be released soon, but we still use 2.9. Let's update it to 2.10 so at least a run with torch 2.10, before we update to torch 2.11 later.",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? Extends the CI so we can use Make and read toml files",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44712",
-    "created_at": "2026-03-14T20:18:01Z",
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44968",
+    "created_at": "2026-03-24T11:43:24Z",
     "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44712/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44712",
+    "files_url": "https://github.com/huggingface/transformers/pull/44968/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44968",
     "labels": [],
-    "merged": true,
-    "number": 44712,
+    "merged": false,
+    "number": 44968,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Update Nvidia CI docker file to use torch 2.10",
-    "updated_at": "2026-03-14T20:29:04Z"
+    "title": "Added Make to the docker and `tomli` to `.[quality]`",
+    "updated_at": "2026-03-24T15:06:29Z"
   },
   {
-    "additions": 339,
-    "author": "anuq",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Fixes #35141. When `tie_word_embeddings=False`, calling `resize_token_embeddings()` creates a new `nn.Linear` for the LM head via `_get_resized_lm_head()`. The new module's weight and bias tensors do **not** carry\u2026",
-    "changed_files": 4,
+    "additions": 87,
+    "author": "Qubitium",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fix: FA kernel launches currently are not thread-safe (nogil) in multi-gpu env. This simple patch fixes the issue. ```py # Set the correct CUDA context before launching the FlashAttention kernel. with torch.cuda.dev\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44711",
-    "created_at": "2026-03-14T19:21:21Z",
-    "deletions": 205,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44711/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44711",
-    "labels": [
-      "Code agent slop"
-    ],
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44967",
+    "created_at": "2026-03-24T11:33:45Z",
+    "deletions": 84,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44967/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44967",
+    "labels": [],
     "merged": false,
-    "number": 44711,
+    "number": 44967,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: mark new lm_head params as `_is_hf_initialized` after `resize_token_embeddings`",
-    "updated_at": "2026-03-20T13:36:58Z"
+    "state": "open",
+    "title": "[WIP] Fix FA kernel launch needs correct cuda device ctx in multi-gpu env",
+    "updated_at": "2026-03-26T11:10:48Z"
   },
   {
-    "additions": 12,
-    "author": "he-yufeng",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes `AutoProcessor.from_pretrained` silently dropping hub kwargs like `force_download`, `cache_dir`, `token`, `revision`, etc. ### The bug The existing code on line ~300 filters kwargs using `inspect.signature(ca\u2026",
+    "additions": 8,
+    "author": "pramilajangid",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes #44964 ## Summary This PR restores backward compatibility for `CommonKwargs` in `transformers.processing_utils`, which is still referenced by some remote processor implementations. ## Problem After the typed-dict cleanup (commit `533\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44710",
-    "created_at": "2026-03-14T18:33:53Z",
-    "deletions": 2,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44966",
+    "created_at": "2026-03-24T11:06:57Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44710/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44710",
+    "files_url": "https://github.com/huggingface/transformers/pull/44966/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44966",
     "labels": [],
-    "merged": true,
-    "number": 44710,
+    "merged": false,
+    "number": 44966,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix AutoProcessor.from_pretrained silently dropping hub kwargs",
-    "updated_at": "2026-03-25T18:13:14Z"
+    "title": "Fix backward compatibility for CommonKwargs in processing_utils (brea\u2026",
+    "updated_at": "2026-03-24T12:48:44Z"
   },
   {
-    "additions": 6778,
-    "author": "LucasMa2025",
-    "author_association": "FIRST_TIMER",
-    "body_excerpt": "# \ud83c\udf9b\ufe0f Add Configurable Generation Scheduler and State Machine for `generate()` ## Summary This PR introduces a **fully optional, zero-intrusion** Generation Scheduler (`GenerationScheduler`) and explicit state machine (`GenerationStateMachi\u2026",
-    "changed_files": 15,
+    "additions": 37,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44708",
-    "created_at": "2026-03-14T17:13:34Z",
-    "deletions": 7,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44708/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44708",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44965",
+    "created_at": "2026-03-24T10:59:31Z",
+    "deletions": 32,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44965/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44965",
     "labels": [],
     "merged": false,
-    "number": 44708,
+    "number": 44965,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Add Configurable Generation Scheduler and State Machine for `generate()`",
-    "updated_at": "2026-03-14T19:19:11Z"
+    "state": "open",
+    "title": "try",
+    "updated_at": "2026-03-24T11:19:27Z"
   },
   {
     "additions": 3,
-    "author": "saivedant169",
-    "author_association": "NONE",
-    "body_excerpt": "Fixes part of #32937 ## What does this PR do? Adds `position_ids` as an explicit parameter to `MptForCausalLM.forward()` and `MptModel.forward()`, bringing MPT in line with other CausalLM models. Same rationale as the Bloom PR (#44706) \u2014 M\u2026",
+    "author": "josh-kean",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes an import in src/transformers/video_processing_utils.py that was causing the main build to fail Fixes # 44933 ## Code Agent Policy The Transformers repo is currently being overwhelmed by a large number of PRs\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44707",
-    "created_at": "2026-03-14T17:12:16Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44958",
+    "created_at": "2026-03-23T20:07:09Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44707/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44707",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44958/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44958",
+    "labels": [],
     "merged": false,
-    "number": 44707,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Add position_ids to MptForCausalLM forward pass",
-    "updated_at": "2026-03-18T13:39:36Z"
+    "number": 44958,
+    "review_comments_count": 1,
+    "state": "open",
+    "title": "fixed import error with PILImageResampling",
+    "updated_at": "2026-03-24T13:53:00Z"
   },
   {
-    "additions": 3,
-    "author": "saivedant169",
-    "author_association": "NONE",
-    "body_excerpt": "Fixes part of #32937 ## What does this PR do? Adds `position_ids` as an explicit parameter to `BloomForCausalLM.forward()` and `BloomModel.forward()`, bringing Bloom in line with other CausalLM models like Llama, Falcon, Gemma, and Mistral\u2026",
-    "changed_files": 1,
+    "additions": 1261,
+    "author": "bigshanedogg",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Adds native Transformers support for **[HyperCLOVA X SEED Think 14B](https://huggingface.co/naver-hyperclovax/HyperCLOVAX-SEED-Think-14B)**, a 14.74B-parameter Korean reasoning LLM developed by NAVER Cloud. - relate\u2026",
+    "changed_files": 12,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44706",
-    "created_at": "2026-03-14T17:09:11Z",
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44956",
+    "created_at": "2026-03-23T19:34:30Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44706/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44706",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44956/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44956",
+    "labels": [],
     "merged": false,
-    "number": 44706,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Add position_ids to BloomForCausalLM forward pass",
-    "updated_at": "2026-03-18T13:39:51Z"
+    "number": 44956,
+    "review_comments_count": 20,
+    "state": "open",
+    "title": "Add HyperCLOVAX SEED Think 14B",
+    "updated_at": "2026-04-04T02:02:47Z"
   },
   {
-    "additions": 14,
-    "author": "saivedant169",
-    "author_association": "NONE",
-    "body_excerpt": "Fixes part of #32937 ## What does this PR do? RoFormer introduced rotary position embeddings, but its `ForCausalLM` forward method doesn't accept `position_ids` \u2014 which means callers can't specify custom positions for packed sequences or f\u2026",
+    "additions": 0,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "removes outdated qa pipeline reference",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44705",
-    "created_at": "2026-03-14T16:48:06Z",
-    "deletions": 1,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44954",
+    "created_at": "2026-03-23T17:20:37Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44705/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44705",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44954/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44954",
+    "labels": [],
     "merged": false,
-    "number": 44705,
+    "number": 44954,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Add position_ids to RoFormerForCausalLM forward pass",
-    "updated_at": "2026-03-18T13:40:05Z"
+    "state": "open",
+    "title": "[docs] pipeline cleanup",
+    "updated_at": "2026-03-23T17:30:10Z"
   },
   {
-    "additions": 26,
-    "author": "vasqu",
+    "additions": 861,
+    "author": "zucchini-nlp",
     "author_association": "MEMBER",
-    "body_excerpt": "As per title, it seems that the `cute` subfolder can be even distributed if you only install FA2 which implies something wrong. Now we check under the (normalized) distribution names",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? Decouples `kwargs` manipulation from hub's strict decorator, and ensures that all subclasses of a `PreTrainedConfig` accept any kwargs which is what we supported prev. Not all remote code has `@strict` or has an `__\u2026",
+    "changed_files": 536,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44703",
-    "created_at": "2026-03-14T14:46:02Z",
-    "deletions": 10,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44953",
+    "created_at": "2026-03-23T17:13:39Z",
+    "deletions": 824,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44703/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44703",
+    "files_url": "https://github.com/huggingface/transformers/pull/44953/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44953",
     "labels": [],
     "merged": true,
-    "number": 44703,
-    "review_comments_count": 1,
+    "number": 44953,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[`FA`] Fix fa detection",
-    "updated_at": "2026-03-14T17:19:07Z"
+    "title": "Config kwargs",
+    "updated_at": "2026-03-24T14:14:46Z"
   },
   {
-    "additions": 148,
-    "author": "LincolnBurrows2017",
+    "additions": 10,
+    "author": "Jess-Co-Del",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR fix? The `rms_norm_eps` parameter in `MistralConfig` was incorrectly typed as `int | None` but defaults to `1e-6` which is a float. This parameter is passed to `MistralRMSNorm` which expects `eps: float`. ### Bug Detai\u2026",
-    "changed_files": 8,
+    "body_excerpt": "# What does this PR do? Fixes the non existence of output dictionary change, when parameter output_hidden_states=True is passed to models like CLIP or SigLip. This is especially pertinent for the vision model config. According to #42759 no\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44702",
-    "created_at": "2026-03-14T14:41:15Z",
-    "deletions": 25,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44952",
+    "created_at": "2026-03-23T17:02:50Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44702/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44702",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44952/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44952",
+    "labels": [],
     "merged": false,
-    "number": 44702,
+    "number": 44952,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: Correct rms_norm_eps type hint from int to float in MistralConfig",
-    "updated_at": "2026-03-18T13:00:12Z"
+    "state": "open",
+    "title": "Fix: Add correct return behaviour when output_hidden_states=True for CLIP and SIGLIP vision models",
+    "updated_at": "2026-03-24T11:19:35Z"
   },
   {
-    "additions": 219,
-    "author": "hmellor",
-    "author_association": "MEMBER",
-    "body_excerpt": "These models have `base_model_pp_plan`s but currently do not work because the base model's forward pass depends on all the `layers` being `Qwen2VLDecoderLayer`. i.e. if one of the layers is removed/replaced with `Identity`, `decoder_layer.\u2026",
-    "changed_files": 52,
+    "additions": 113,
+    "author": "hemantmm",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This pull request adds routing replay functionality for mixture-of-experts (MoE) model types by giving users the option to override router probabilities while processing a forward pass through their models. <!-- Con\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44699",
-    "created_at": "2026-03-14T11:44:24Z",
-    "deletions": 148,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44951",
+    "created_at": "2026-03-23T16:29:46Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44699/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44699",
+    "files_url": "https://github.com/huggingface/transformers/pull/44951/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44951",
     "labels": [],
-    "merged": true,
-    "number": 44699,
+    "merged": false,
+    "number": 44951,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix several based models' pipeline parallel support",
-    "updated_at": "2026-03-20T13:53:27Z"
+    "state": "open",
+    "title": "feat: Add router_logits override to enable Routing Replay for MoE models",
+    "updated_at": "2026-03-26T12:36:20Z"
   },
   {
-    "additions": 1,
-    "author": "hmellor",
+    "additions": 1346,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "The typo in the `elif` chain meant that `image` and `video` modalidty encoders could not be set using this method. This PR fixes the typo so that they can.",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? As per the title. This PR finally makes mamba layer caches first class citizen, and adds native support for them. It supports the following layers combinations: - all mamba layers - alternating attention layer/mamba\u2026",
+    "changed_files": 64,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44698",
-    "created_at": "2026-03-14T11:18:54Z",
-    "deletions": 1,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44950",
+    "created_at": "2026-03-23T16:25:13Z",
+    "deletions": 4113,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44698/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44698",
+    "files_url": "https://github.com/huggingface/transformers/pull/44950/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44950",
     "labels": [],
     "merged": true,
-    "number": 44698,
-    "review_comments_count": 0,
+    "number": 44950,
+    "review_comments_count": 48,
     "state": "closed",
-    "title": "Fix `set_encoder`",
-    "updated_at": "2026-03-14T13:42:00Z"
+    "title": "\ud83d\udea8 [Cache] Native mamba & hybrid cache",
+    "updated_at": "2026-03-31T13:09:44Z"
   },
   {
-    "additions": 75,
-    "author": "LincolnBurrows2017",
+    "additions": 80,
+    "author": "Charly21r",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Description The `torch_float` function in `src/transformers/utils/generic.py` was incorrectly returning `int(x)` in two places where it should return `float(x)`: 1. When torch is not available (fallback case) 2. When not in a tracing co\u2026",
-    "changed_files": 4,
+    "body_excerpt": "# What does this PR do? Fixes #44936 This PR fixes an issue with `NotebookProgressCallback` in the `Trainer` where calling evaluate() before or after training would crash due to the training tracker being `None`. The callback now properly\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44697",
-    "created_at": "2026-03-14T10:44:12Z",
-    "deletions": 25,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44949",
+    "created_at": "2026-03-23T16:07:50Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44697/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44697",
+    "files_url": "https://github.com/huggingface/transformers/pull/44949/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44949",
     "labels": [],
     "merged": false,
-    "number": 44697,
-    "review_comments_count": 1,
+    "number": 44949,
+    "review_comments_count": 0,
     "state": "open",
-    "title": "fix: torch_float should return float, not int",
-    "updated_at": "2026-03-17T19:29:02Z"
+    "title": "Fix: NotebookProgressCallback crash when evaluating with the Trainer",
+    "updated_at": "2026-04-03T20:05:44Z"
   },
   {
-    "additions": 19,
-    "author": "hmellor",
-    "author_association": "MEMBER",
-    "body_excerpt": "In configs, `base_model_pp_plan` and `base_model_tp_plan` default to `None` In models, `_pp_plan` and `_tp_plan` _look like_ they default to `None` based on the class variables, but will actually always be a dict because of `post_init`. Th\u2026",
+    "additions": 1,
+    "author": "heycorgi",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44696",
-    "created_at": "2026-03-14T09:41:07Z",
-    "deletions": 13,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44948",
+    "created_at": "2026-03-23T15:33:56Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44696/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44696",
+    "files_url": "https://github.com/huggingface/transformers/pull/44948/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44948",
     "labels": [],
-    "merged": true,
-    "number": 44696,
-    "review_comments_count": 5,
+    "merged": false,
+    "number": 44948,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix `supports_{tp/pp}_plan`",
-    "updated_at": "2026-03-18T12:33:58Z"
+    "title": "Create aa.py",
+    "updated_at": "2026-03-23T15:34:35Z"
   },
   {
-    "additions": 4,
-    "author": "harshaljanjani",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? The following failing tests were identified and fixed in this PR: \u2192 **Kyutai Speech-To-Text**: [The PR [processors] Unbloating simple processors](https://github.com/huggingface/transformers/pull/40377), [refactore\u2026",
-    "changed_files": 3,
+    "additions": 123,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? The doc was generated by Claude. I deleted unnecessary repetitions and fixed a few moments to be more precise. We don't really need to merge it now so if you think the text is too LLM, feel free to take this as an i\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44695",
-    "created_at": "2026-03-14T09:05:35Z",
-    "deletions": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44947",
+    "created_at": "2026-03-23T13:23:04Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44695/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44695",
+    "files_url": "https://github.com/huggingface/transformers/pull/44947/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44947",
     "labels": [],
-    "merged": false,
-    "number": 44695,
-    "review_comments_count": 3,
-    "state": "open",
-    "title": "fix(testing): Fix Kyutai Speech-To-Text, LLaVA-OneVision, and LongCatFlash test failures on main CI  ",
-    "updated_at": "2026-03-23T11:51:26Z"
+    "merged": true,
+    "number": 44947,
+    "review_comments_count": 16,
+    "state": "closed",
+    "title": "Add doc page for capturing outputs",
+    "updated_at": "2026-03-26T13:08:46Z"
   },
   {
-    "additions": 143,
-    "author": "LincolnBurrows2017",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Summary Fixes issue #44625: Qwen3.5 num_labels not propagated from core config to text config. When loading `AutoConfig.from_pretrained(\"Qwen3.5\", num_labels=1)`, the outer config gets `num_labels=1` but the inner `text_config` still ha\u2026",
-    "changed_files": 7,
+    "additions": 14,
+    "author": "BSchilperoort",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 13,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44693",
-    "created_at": "2026-03-14T05:43:00Z",
-    "deletions": 30,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44946",
+    "created_at": "2026-03-23T12:18:34Z",
+    "deletions": 14,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44693/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44693",
+    "files_url": "https://github.com/huggingface/transformers/pull/44946/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44946",
     "labels": [],
-    "merged": false,
-    "number": 44693,
+    "merged": true,
+    "number": 44946,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: Propagate num_labels to text_config in Qwen3.5",
-    "updated_at": "2026-03-18T12:56:25Z"
+    "title": "Correct docstrings for `from_pretrained` (url input deprecated)",
+    "updated_at": "2026-03-23T13:05:16Z"
   },
   {
-    "additions": 18,
-    "author": "gambletan",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #44514. `Qwen2_5_VLProcessor.apply_chat_template` crashes with `ValueError` when called with batched input and `padding=False` (the default). The root cause is `np.array(text_inputs[\"input_ids\"])` which fails when sequence\u2026",
-    "changed_files": 2,
+    "additions": 71,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? @hmellor",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44692",
-    "created_at": "2026-03-14T04:14:38Z",
-    "deletions": 10,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44943",
+    "created_at": "2026-03-23T10:58:40Z",
+    "deletions": 9,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44692/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44692",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44692,
-    "review_comments_count": 0,
+    "files_url": "https://github.com/huggingface/transformers/pull/44943/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44943",
+    "labels": [],
+    "merged": true,
+    "number": 44943,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "fix: handle ragged input_ids in Qwen2_5_VLProcessor.apply_chat_template",
-    "updated_at": "2026-03-18T12:44:18Z"
+    "title": "Clearer type hints and fix rope validation in configs",
+    "updated_at": "2026-03-23T13:32:11Z"
   },
   {
-    "additions": 23,
-    "author": "gambletan",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary - Fixes `num_labels` (and `id2label`/`label2id`) not being propagated from the outer `Qwen3_5Config` to its inner `text_config` when passed via `AutoConfig.from_pretrained(..., num_labels=1)`. - When `text_config` is `None` or a\u2026",
-    "changed_files": 2,
+    "additions": 220,
+    "author": "hmellor",
+    "author_association": "MEMBER",
+    "body_excerpt": null,
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44691",
-    "created_at": "2026-03-14T04:10:54Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44691/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44691",
-    "labels": [
-      "Code agent slop"
-    ],
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44942",
+    "created_at": "2026-03-23T10:46:23Z",
+    "deletions": 5,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44942/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44942",
+    "labels": [],
     "merged": false,
-    "number": 44691,
+    "number": 44942,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix Qwen3.5 num_labels not propagated to text_config",
-    "updated_at": "2026-03-18T12:57:19Z"
+    "state": "open",
+    "title": "Add inference time layer fusion optimisations via `PreTrainedModel.from_pretrained(fuse_layers=True)`",
+    "updated_at": "2026-03-25T16:14:19Z"
   },
   {
-    "additions": 6,
-    "author": "gambletan",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #44360 The `GlmMoeDsaIndexer` is missing a ReLU activation on the per-head dot-product scores before the weighted sum across heads. The reference DeepSeek V3.2 implementation applies ReLU inside the `fp8_index` kernel: ```\u2026",
-    "changed_files": 2,
+    "additions": 4,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fix the failing job after #43514 (the fix is effefctive, see [here](https://github.com/huggingface/transformers/actions/runs/23433395911/job/68165255513?pr=44941)) [Update Transformers metadata](https://github.com/h\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44690",
-    "created_at": "2026-03-14T03:44:37Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44941",
+    "created_at": "2026-03-23T10:42:09Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44690/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44690",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44690,
-    "review_comments_count": 0,
+    "files_url": "https://github.com/huggingface/transformers/pull/44941/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44941",
+    "labels": [],
+    "merged": true,
+    "number": 44941,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Fix missing ReLU in GLM-MOE-DSA indexer scoring",
-    "updated_at": "2026-03-18T12:40:23Z"
+    "title": "Fix failing job `Update Transformers metadata` after #43514",
+    "updated_at": "2026-03-23T13:41:39Z"
   },
   {
-    "additions": 141,
-    "author": "LincolnBurrows2017",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Summary Fixes issue #44625: Qwen3.5 num_labels not propagating to text_config. When calling `AutoConfig.from_pretrained(\"Qwen3.5\", num_labels=1)`, the main config gets `num_labels=1` but text_config still has default `num_labels=2`. Thi\u2026",
-    "changed_files": 6,
+    "additions": 138,
+    "author": "Qubitium",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Model loading of same model path but 2 different threads (2 different instances) have meta device tensor issues: unloaded meta/empty embedding/lm-head when it should not be empty post model load. Cause: `tie_weight(\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44688",
-    "created_at": "2026-03-14T00:40:50Z",
-    "deletions": 23,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44940",
+    "created_at": "2026-03-23T09:55:57Z",
+    "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44688/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44688",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44940/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44940",
+    "labels": [],
     "merged": false,
-    "number": 44688,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: Propagate num_labels to text_config in Qwen models",
-    "updated_at": "2026-03-18T12:56:41Z"
+    "number": 44940,
+    "review_comments_count": 3,
+    "state": "open",
+    "title": "Fix tie_weights skipping logic is not tied to model thread scope",
+    "updated_at": "2026-04-07T02:01:50Z"
   },
   {
-    "additions": 8,
-    "author": "vxa8502",
-    "author_association": "NONE",
-    "body_excerpt": "Fixes partial #32937 Adds explicit `position_ids` threading through GPT-Neo's attention layers to enable flash attention's packed sequence optimization. ## Context GPT-Neo uses learned absolute position embeddings (`wpe`) applied at the mo\u2026",
-    "changed_files": 1,
+    "additions": 2038,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Refactored and cleaned up model linter - separated package - one rule per module - refactored legacy checks into their own rules - simplified pattern, duplication removal",
+    "changed_files": 25,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44687",
-    "created_at": "2026-03-13T23:28:55Z",
-    "deletions": 0,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44939",
+    "created_at": "2026-03-23T08:45:36Z",
+    "deletions": 1446,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44687/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44687",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44687,
-    "review_comments_count": 0,
+    "files_url": "https://github.com/huggingface/transformers/pull/44939/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44939",
+    "labels": [],
+    "merged": true,
+    "number": 44939,
+    "review_comments_count": 5,
     "state": "closed",
-    "title": "Add explicit position_ids to GPT-Neo attention layers",
-    "updated_at": "2026-03-18T13:06:49Z"
+    "title": "refactor: mlinter as its own package",
+    "updated_at": "2026-03-24T07:56:15Z"
   },
   {
-    "additions": 615,
-    "author": "tejasae-afk",
-    "author_association": "NONE",
-    "body_excerpt": "During an automated code review of src/transformers/models/marian/convert_marian_to_pytorch.py, the following issue was identified. Use safe_load in convert marian to pytorch. yaml.load on untrusted input can construct arbitrary Python obj\u2026",
-    "changed_files": 80,
+    "additions": 2,
+    "author": "VanshikaSohal",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes two small but impactful bugs in the BART documentation: 1. **Variable shadowing bug**: In the Pipeline example, the variable was named `pipeline` which shadows the imported `pipeline` function. Renamed to `fi\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44686",
-    "created_at": "2026-03-13T21:22:07Z",
-    "deletions": 259,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44935",
+    "created_at": "2026-03-22T18:45:01Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44686/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44686",
+    "files_url": "https://github.com/huggingface/transformers/pull/44935/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44935",
     "labels": [],
-    "merged": false,
-    "number": 44686,
+    "merged": true,
+    "number": 44935,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Use safe_load in convert marian to pytorch",
-    "updated_at": "2026-03-14T03:54:31Z"
+    "title": "Fix variable shadowing in pipeline example and typo in BART docs (BERT \u2192 BART)",
+    "updated_at": "2026-03-23T14:28:04Z"
   },
   {
-    "additions": 10,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? For tiny model creation script - new added model test files still miss this argument ...",
-    "changed_files": 3,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
+    "additions": 9,
+    "author": "Sai-Suraj-27",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes this failing [T5ModelIntegrationTest](https://github.com/huggingface/transformers/actions/runs/23230643883/job/67524758706#step:14:1449) & this [Qwen2IntegrationTest](https://github.com/huggingface/transformer\u2026",
+    "changed_files": 2,
+    "cluster_id": "cluster-43453-9",
+    "cluster_ids": [
+      "cluster-43453-9"
+    ],
+    "cluster_role": "member",
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44685",
-    "created_at": "2026-03-13T20:53:41Z",
-    "deletions": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44934",
+    "created_at": "2026-03-22T18:03:34Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44685/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44685",
+    "files_url": "https://github.com/huggingface/transformers/pull/44934/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44934",
     "labels": [],
     "merged": true,
-    "number": 44685,
+    "number": 44934,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix more model tester missing `parent` issue",
-    "updated_at": "2026-03-13T21:03:46Z"
+    "title": "Fix failing `T5ModelIntegrationTest`",
+    "updated_at": "2026-03-24T14:50:10Z"
   },
   {
-    "additions": 41,
-    "author": "ntenenz",
+    "additions": 1,
+    "author": "r266-tech",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "\u2026 # What does this PR do? In torch versions >= 2.9.0, it requests the lse from flex_attenetion using `AuxRequest` instead of the deprecated `return_lse`, which triggers a warning and can break tracing. Fixes #44683 ## Before submitting - [\u2026",
+    "body_excerpt": "## What does this PR do? Fixes #44908 The `get_inverse_sqrt_schedule` function accepts `timescale` and `last_epoch` parameters, but `get_scheduler` was not forwarding `scheduler_specific_kwargs` to it. This caused user-provided kwargs like\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44684",
-    "created_at": "2026-03-13T20:16:35Z",
-    "deletions": 5,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44932",
+    "created_at": "2026-03-22T17:30:56Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44684/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44684",
-    "labels": [],
-    "merged": true,
-    "number": 44684,
-    "review_comments_count": 8,
+    "files_url": "https://github.com/huggingface/transformers/pull/44932/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44932",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44932,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "update flex attention to use `return_aux` instead of `return_lse` when torch verison >= 2.9",
-    "updated_at": "2026-03-18T11:44:18Z"
+    "title": "Fix: Pass scheduler_specific_kwargs to inverse_sqrt scheduler",
+    "updated_at": "2026-03-23T12:44:16Z"
   },
   {
-    "additions": 301,
-    "author": "SunMarc",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Llama cpp integration in transformers serve. Minor changes to add llama.cpp integration Mostly changes on serve to fix latency for streaming and non streaming",
-    "changed_files": 2,
+    "additions": 1,
+    "author": "r266-tech",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes a v5 regression where `CamembertForMaskedLM` (and all CamemBERT masked-LM tasks) produces near-zero, near-uniform logits, making the model completely non-functional. ### Root cause In v5, `modeling_utils.get_\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44682",
-    "created_at": "2026-03-13T18:52:41Z",
-    "deletions": 73,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44931",
+    "created_at": "2026-03-22T17:28:57Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44682/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44682",
+    "files_url": "https://github.com/huggingface/transformers/pull/44931/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44931",
     "labels": [],
-    "merged": false,
-    "number": 44682,
+    "merged": true,
+    "number": 44931,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "transformers serve + llamacpp",
-    "updated_at": "2026-03-14T07:05:29Z"
+    "state": "closed",
+    "title": "fix(camembert): add tie_word_embeddings=True to CamembertConfig",
+    "updated_at": "2026-03-25T07:09:37Z"
   },
   {
-    "additions": 47,
-    "author": "dacorvo",
-    "author_association": "MEMBER",
-    "body_excerpt": "Fixes #44679 ## Summary - Custom attention kernels registered via `load_and_register_attn_kernel` currently get hardcoded `flash_attention_2` mask dispatch, which produces 2D or `None` masks - Kernels that need SDPA-style 4D boolean masks\u2026",
+    "additions": 103,
+    "author": "javierdejesusda",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary - Fixes #44912 \u2014 MXFP4 quantization error messages combine `is_triton_available()` and `is_kernels_available()` into a single `kernels_available` boolean, making it impossible to identify which dependency is missing - Split the\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44680",
-    "created_at": "2026-03-13T17:55:54Z",
-    "deletions": 1,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44930",
+    "created_at": "2026-03-22T17:27:20Z",
+    "deletions": 13,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44680/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44680",
+    "files_url": "https://github.com/huggingface/transformers/pull/44930/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44930",
     "labels": [],
-    "merged": false,
-    "number": 44680,
-    "review_comments_count": 12,
-    "state": "open",
-    "title": "Allow kernel modules to declare their preferred mask function",
-    "updated_at": "2026-03-19T11:27:09Z"
+    "merged": true,
+    "number": 44930,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "fix: split MXFP4 dependency checks for specific error messages",
+    "updated_at": "2026-03-24T15:33:14Z"
   },
   {
-    "additions": 9,
-    "author": "JokeYoonic",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Problem: - On macOS ARM64 + Python 3.13 + transformers 5.x, GPT-2 model's lm_head forward pass produces NaN/Inf values during inference - Root cause: lm_head.weight is tied to transformer.wte.weight, and the shared memory reference causes\u2026",
+    "additions": 26,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "## Problem In `TokenizersBackend.convert_to_native_format()`, when a tokenizer has a custom `__init__` (the `elif` branch), `tokenizer.json` was parsed **twice**: 1. `TokenizerFast.from_file(fast_tokenizer_file)` \u2014 full Rust parse includin\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44676",
-    "created_at": "2026-03-13T16:28:01Z",
-    "deletions": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44927",
+    "created_at": "2026-03-22T15:33:23Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44676/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44676",
+    "files_url": "https://github.com/huggingface/transformers/pull/44927/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44927",
     "labels": [],
-    "merged": false,
-    "number": 44676,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "fix(gpt2): Resolve NaN/Inf issue in lm_head on Python 3.13 with tied weights",
-    "updated_at": "2026-03-18T17:16:49Z"
+    "merged": true,
+    "number": 44927,
+    "review_comments_count": 6,
+    "state": "closed",
+    "title": "fix: improve processor loading performance by avoiding redundant tokenizer parsing",
+    "updated_at": "2026-03-23T11:03:52Z"
   },
   {
-    "additions": 32,
-    "author": "stevhliu",
+    "additions": 25,
+    "author": "yonigozlan",
     "author_association": "MEMBER",
-    "body_excerpt": "properly formats the `ContinuousBatchingConfig` below: <img width=\"976\" height=\"626\" alt=\"Screenshot 2026-03-13 at 9 09 39 AM\" src=\"https://github.com/user-attachments/assets/4390c6f7-bb63-4039-a46e-9f4ae23f5d98\" />",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? Solve import errors when trying to import `from transformers.models.llama4.image_processing_llama4_fast import Llama4ImageProcessorFast` for example",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44675",
-    "created_at": "2026-03-13T16:10:28Z",
-    "deletions": 14,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44926",
+    "created_at": "2026-03-22T14:46:17Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44675/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44675",
+    "files_url": "https://github.com/huggingface/transformers/pull/44926/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44926",
     "labels": [],
     "merged": true,
-    "number": 44675,
-    "review_comments_count": 0,
+    "number": 44926,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "[docs] cb config",
-    "updated_at": "2026-03-13T23:15:04Z"
+    "title": "Fix backward compatibility for full path imports of Fast Image Processors",
+    "updated_at": "2026-03-23T14:16:49Z"
   },
   {
-    "additions": 408,
-    "author": "Rocketknight1",
+    "additions": 482,
+    "author": "kashif",
     "author_association": "MEMBER",
-    "body_excerpt": "We've had `parse_response()` in the library for a while, but it's been a soft launch / prototype feature. This PR cleans it up and documents it, making it an official feature! The API is largely unchanged from the prototype, but we drop `x\u2026",
-    "changed_files": 5,
+    "body_excerpt": "# What does this PR do? This PR adds a first-class MoE routing capture/replay API for Qwen2Moe and introduces shared MoE routing helpers for reuse by other MoE model families. It adds: - a structured `MoERouting` payload in modeling output\u2026",
+    "changed_files": 7,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44674",
-    "created_at": "2026-03-13T15:41:42Z",
-    "deletions": 34,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44925",
+    "created_at": "2026-03-22T14:04:40Z",
+    "deletions": 24,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44674/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44674",
+    "files_url": "https://github.com/huggingface/transformers/pull/44925/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44925",
     "labels": [],
-    "merged": true,
-    "number": 44674,
-    "review_comments_count": 11,
-    "state": "closed",
-    "title": "Officially launch parse_response",
-    "updated_at": "2026-03-24T15:55:05Z"
+    "merged": false,
+    "number": 44925,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[MOE]  MoE routing capture and replay support",
+    "updated_at": "2026-04-04T19:26:29Z"
   },
   {
-    "additions": 73,
-    "author": "remi-or",
-    "author_association": "MEMBER",
-    "body_excerpt": "This PR fixes a bug in continuous batching where non-CUDA devices cannot use the feature because some CUDA-exclusive objects are always instantiated. It also adds a test to make sure this will not break again in the future.",
-    "changed_files": 3,
+    "additions": 9,
+    "author": "Qubitium",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fix two `nogil` threading bugs (reproduced on 3.14) : 1. Continus Batching crashes with torch graph errors with 2 threads on 2 separate model instances (same model path, but two distinct instances). Cause is missing\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44673",
-    "created_at": "2026-03-13T15:37:01Z",
-    "deletions": 15,
+    "comments_count": 13,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44924",
+    "created_at": "2026-03-22T11:46:49Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44673/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44673",
+    "files_url": "https://github.com/huggingface/transformers/pull/44924/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44924",
     "labels": [],
     "merged": true,
-    "number": 44673,
-    "review_comments_count": 0,
+    "number": 44924,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "[CB] [Bug] Fix crashes when running without cuda",
-    "updated_at": "2026-03-15T23:59:55Z"
+    "title": "Continuous batching thread safety",
+    "updated_at": "2026-03-24T05:42:56Z"
   },
   {
-    "additions": 1,
-    "author": "neo",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "additions": 3,
+    "author": "prakhar-agarwal",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Addresses issue #44843. Verified with isolated repro logic. Changes made: Updated the logic to properly identify local and offline scenarios upfront. Now, is_local is correctly set to True if: 1. is_offline_mode() is active. 2. The local_f\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44672",
-    "created_at": "2026-03-13T15:33:15Z",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44923",
+    "created_at": "2026-03-22T05:20:22Z",
     "deletions": 1,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44672/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44672",
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44923/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44923",
     "labels": [],
-    "merged": true,
-    "number": 44672,
+    "merged": false,
+    "number": 44923,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix annotations reader for python 3.14 in `PreTrainedModel`",
-    "updated_at": "2026-03-19T13:30:48Z"
+    "state": "open",
+    "title": "fix: avoid unconditional model_info call in _patch_mistral_regex",
+    "updated_at": "2026-03-22T05:24:11Z"
   },
   {
-    "additions": 6,
-    "author": "dacorvo",
-    "author_association": "MEMBER",
-    "body_excerpt": "Fixes #44678 ## Summary - Replace advanced indexing (`self.gate_up_proj[expert_ids]`) with explicit `torch.index_select(self.gate_up_proj, 0, expert_ids)` in `batched_mm_experts_forward` - 6 replacements total (3 weight tensors + 3 bias te\u2026",
+    "additions": 10,
+    "author": "s-zx",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Fixes #44849. When `output_hidden_states=True` (or `output_attentions=True`) is passed to `model.generate()`, the `@capture_outputs` decorator reads the flag value but leaves it in `**kwargs`. These flags then prop\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 11,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44669",
-    "created_at": "2026-03-13T14:52:22Z",
-    "deletions": 6,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44922",
+    "created_at": "2026-03-22T01:21:22Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44669/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44669",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44922/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44922",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44669,
+    "number": 44922,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Use `index_select` instead of advanced indexing in `batched_mm_experts_forward`",
-    "updated_at": "2026-03-19T13:39:23Z"
+    "title": "fix: pop output_* flags from kwargs in capture_outputs to prevent submodule leakage",
+    "updated_at": "2026-03-23T12:38:56Z"
   },
   {
-    "additions": 18,
-    "author": "dacorvo",
-    "author_association": "MEMBER",
-    "body_excerpt": "Fixes #44677 ## Summary - Add `base_model_tp_plan` to `OlmoeConfig`, enabling `from_pretrained(tp_plan=\"auto\")` for OLMoE models - Add `TensorParallelTesterMixin` to OLMoE tests for TP validation coverage - Uses `\"colwise\"` for `q_norm` an\u2026",
-    "changed_files": 2,
+    "additions": 4,
+    "author": "s-zx",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Fixes #44918. `compute_3d_position_ids` in the Qwen2.5-VL / Qwen3-VL / Qwen3.5 model families destructures `inputs_embeds.shape` into exactly three variables: ```python batch_size, seq_length, _ = inputs_embeds.sha\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44668",
-    "created_at": "2026-03-13T14:45:22Z",
-    "deletions": 1,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44921",
+    "created_at": "2026-03-22T00:39:01Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44668/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44668",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44921/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44921",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44668,
-    "review_comments_count": 4,
-    "state": "open",
-    "title": "Add `base_model_tp_plan` to `OlmoeConfig`",
-    "updated_at": "2026-03-24T15:20:34Z"
+    "number": 44921,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix: use shape index access in compute_3d_position_ids for Qwen VL models",
+    "updated_at": "2026-03-23T10:00:51Z"
   },
   {
-    "additions": 412,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? A few find-and-replaces and cache position is deleted from the rest of models. Still have to check docs and test files, so WIP",
-    "changed_files": 120,
+    "additions": 15,
+    "author": "s-zx",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Fixes `num_labels` not being propagated from `Qwen3_5Config` to its `text_config` when loading via `AutoConfig.from_pretrained(model, num_labels=N)`. **Root cause:** `Qwen3_5Config.__post_init__` initializes `text_\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44667",
-    "created_at": "2026-03-13T14:37:26Z",
-    "deletions": 1519,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44920",
+    "created_at": "2026-03-22T00:01:59Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44667/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44667",
+    "files_url": "https://github.com/huggingface/transformers/pull/44920/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44920",
     "labels": [],
     "merged": false,
-    "number": 44667,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Goodbye cache position",
-    "updated_at": "2026-03-19T11:55:04Z"
-  },
-  {
-    "additions": 17,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? - 2 model tester classes didn't follow the usual way we do things, which cause the tiny model creation script to fail with those model classes. - (the script initializes instances of model testers, in order to call\u2026",
-    "changed_files": 3,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44666",
-    "created_at": "2026-03-13T14:24:50Z",
-    "deletions": 2,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44666/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44666",
-    "labels": [],
-    "merged": true,
-    "number": 44666,
+    "number": 44920,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Another (small) set of fixes required for tiny model creation",
-    "updated_at": "2026-03-13T17:20:52Z"
+    "title": "fix: propagate num_labels/id2label to text_config in Qwen3_5Config",
+    "updated_at": "2026-03-23T12:06:04Z"
   },
   {
-    "additions": 1,
-    "author": "vasqu",
-    "author_association": "MEMBER",
-    "body_excerpt": "Dinov3 vit was refactored to introduce a module between top level and layers to have the capture decorators work as intended. Otherwise, it would force the backbone to do manual collection. This introduced a small conversion which is now a\u2026",
+    "additions": 18,
+    "author": "s-zx",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Fixes a crash in `Qwen2_5_VLProcessor.__call__` when processing batched inputs without padding (`padding=False`). **Root cause:** When the tokenizer returns sequences of different lengths (ragged list), `np.array(t\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44665",
-    "created_at": "2026-03-13T13:50:26Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44919",
+    "created_at": "2026-03-21T23:57:37Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44665/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44665",
+    "files_url": "https://github.com/huggingface/transformers/pull/44919/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44919",
     "labels": [],
-    "merged": true,
-    "number": 44665,
+    "merged": false,
+    "number": 44919,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[`Chmv2`] Fix conversion after capture refactor",
-    "updated_at": "2026-03-13T14:28:33Z"
+    "title": "fix: handle ragged batch inputs in Qwen2_5_VLProcessor mm_token_type_ids computation",
+    "updated_at": "2026-03-23T10:38:30Z"
   },
   {
-    "additions": 12,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "Fixes https://github.com/huggingface/transformers/issues/44625 We can create a clf model with LLM or VLM backbone like: ```python from transformers import AutoConfig, AutoModelForSequenceClassification model_name = \"onnx-internal-testing/t\u2026",
-    "changed_files": 6,
+    "additions": 5,
+    "author": "s-zx",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary `GPTNeoXConfig.convert_rope_params_to_dict` unconditionally overwrote `rope_parameters[\"partial_rotary_factor\"]` with the default `0.25` when `rotary_pct` was absent from kwargs. On every `from_pretrained` call, `rotary_pct` is\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44664",
-    "created_at": "2026-03-13T13:39:52Z",
-    "deletions": 182,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44917",
+    "created_at": "2026-03-21T23:34:32Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44664/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44664",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44917/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44917",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44664,
+    "number": 44917,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Generic Sequence Classifier works for multimodal models",
-    "updated_at": "2026-03-13T15:41:29Z"
+    "state": "closed",
+    "title": "fix(gpt-neox): preserve rotary_pct across save/load cycle",
+    "updated_at": "2026-03-23T12:37:48Z"
   },
   {
-    "additions": 1,
-    "author": "Ker102",
+    "additions": 8,
+    "author": "s-zx",
     "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #44661 \u2014 `transformers add-new-model-like` crashes with `AttributeError: 'ModelInfos' object has no attribute 'tokenizer_class'` when selecting a model that is in `TOKENIZER_MAPPING_NAMES`. ## Root Cause PR #40936 refactor\u2026",
-    "changed_files": 1,
+    "body_excerpt": "## Summary Importing `DebertaV2Model` (or anything that depends on it, e.g. `gliner`) raises `IndentationError` on Python 3.13 because `torch.jit.script` calls `inspect.getsource()`, dedents the snippet, and passes it to `ast.parse()`. Pyt\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44663",
-    "created_at": "2026-03-13T13:25:48Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44916",
+    "created_at": "2026-03-21T23:34:07Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44663/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44663",
+    "files_url": "https://github.com/huggingface/transformers/pull/44916/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44916",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44663,
+    "number": 44916,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: restore missing `tokenizer_class` attribute in `ModelInfos.__init__`",
-    "updated_at": "2026-03-13T14:02:00Z"
+    "title": "fix(deberta-v2): move \"Copied from\" comments above @torch.jit.script for Python 3.13 compat",
+    "updated_at": "2026-03-23T12:34:24Z"
   },
   {
-    "additions": 7084,
-    "author": "CyrilSterling",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR supports PenguinVL model. Paper: https://arxiv.org/abs/2603.06569 Github repo: https://github.com/tencent-ailab/Penguin-VL HuggingFace Model: https://huggingface.co/collections/tencent/ai-lab ## Before submi\u2026",
-    "changed_files": 20,
+    "additions": 90,
+    "author": "maxsloef-goodfire",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? `clean_up_tokenization` applies English-specific string replacements (` .` \u2192 `.`, ` ?` \u2192 `?`, ` ,` \u2192 `,`, etc.) to decoded text. This was designed for BERT-era WordPiece tokenizers where decoding produced artifacts\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44662",
-    "created_at": "2026-03-13T13:02:26Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44915",
+    "created_at": "2026-03-21T20:45:03Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44662/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44662",
+    "files_url": "https://github.com/huggingface/transformers/pull/44915/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44915",
     "labels": [],
     "merged": false,
-    "number": 44662,
-    "review_comments_count": 94,
+    "number": 44915,
+    "review_comments_count": 1,
     "state": "open",
-    "title": "[model] Add PenguinVL implementation",
-    "updated_at": "2026-03-20T10:00:17Z"
+    "title": "fix: skip `clean_up_tokenization` for BPE tokenizers in `PreTrainedTokenizerFast`",
+    "updated_at": "2026-03-23T18:45:52Z"
   },
   {
-    "additions": 5,
-    "author": "DogWala",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes #44637 This PR makes the PEFT `load_best_model_at_end` path in `Trainer` use a CPU-first adapter reload path during best-model loading. Previously, when training a PEFT model, `Trainer` could reload the best a\u2026",
+    "additions": 1,
+    "author": "maxsloef-goodfire",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? The `Llama3Converter` in `convert_llama_weights_to_hf.py` hardcodes `clean_up_tokenization_spaces=True` (line 468). This causes `tokenizer.decode()` to silently strip spaces before punctuation for all converted Lla\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44660",
-    "created_at": "2026-03-13T12:59:25Z",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44914",
+    "created_at": "2026-03-21T20:25:51Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44660/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44660",
+    "files_url": "https://github.com/huggingface/transformers/pull/44914/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44914",
     "labels": [],
-    "merged": false,
-    "number": 44660,
+    "merged": true,
+    "number": 44914,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Fix: avoid late CUDA OOM in load_best_model_at_end with PEFT models",
-    "updated_at": "2026-03-13T13:18:52Z"
+    "state": "closed",
+    "title": "fix: set `clean_up_tokenization_spaces=False` in Llama 3 tokenizer conversion",
+    "updated_at": "2026-03-23T08:38:18Z"
   },
   {
-    "additions": 0,
-    "author": "Olexandr88",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Removes outdated use_diff entry from the docstring. The parameter is not present in the method signature or implementation.",
+    "additions": 8,
+    "author": "ouroborosscr",
+    "author_association": "FIRST_TIMER",
+    "body_excerpt": "Qwen3.5 uses 3D position_ids [3, batch, seq_len] for multi-dimensional rotary embedding. _is_packed_sequence() misinterprets this as a packed sequence, causing cu_seqlens to be constructed with 3x the actual token count. Flash attention th\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44659",
-    "created_at": "2026-03-13T11:08:13Z",
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44911",
+    "created_at": "2026-03-21T15:42:57Z",
     "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44659/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44659",
+    "files_url": "https://github.com/huggingface/transformers/pull/44911/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44911",
     "labels": [],
     "merged": false,
-    "number": 44659,
+    "number": 44911,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "docs: remove outdated use_diff docstring from DistributedConfig.to_js\u2026",
-    "updated_at": "2026-03-13T13:42:29Z"
+    "state": "closed",
+    "title": "Fix flash attention crash with 3D position_ids (Qwen3.5)",
+    "updated_at": "2026-03-24T14:35:57Z"
   },
   {
-    "additions": 18,
-    "author": "Charly21r",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes an issue where the `.modelcard` attribute of a pipeline is not initialized. Without this fix, calling `save_pretrained` on a pipeline (e.g., ASR pipeline) raises an `AttributeError` because `.modelcard` does n\u2026",
-    "changed_files": 2,
+    "additions": 1,
+    "author": "anshuS1310",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "The `get_scheduler` function was identifying the `inverse_sqrt` scheduler type but failing to pass `**scheduler_specific_kwargs` to the underlying `get_inverse_sqrt_schedule` function. This caused user-defined parameters like `timescale` t\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44658",
-    "created_at": "2026-03-13T10:36:22Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44909",
+    "created_at": "2026-03-21T09:59:07Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44658/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44658",
+    "files_url": "https://github.com/huggingface/transformers/pull/44909/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44909",
     "labels": [],
-    "merged": false,
-    "number": 44658,
+    "merged": true,
+    "number": 44909,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix: fix Pipeline's save_pretrained method (issue #44655)",
-    "updated_at": "2026-03-13T14:08:27Z"
+    "title": "Fix: Update optimization.py",
+    "updated_at": "2026-03-24T13:06:15Z"
   },
   {
-    "additions": 1,
-    "author": "kaixuanliu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "@ArthurZucker @Cyrilvallez pls help review. Thx!",
-    "changed_files": 1,
-    "cluster_id": "cluster-43324-12",
-    "cluster_ids": [
-      "cluster-43324-12"
-    ],
-    "cluster_role": "member",
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44657",
-    "created_at": "2026-03-13T10:25:07Z",
-    "deletions": 0,
+    "additions": 200,
+    "author": "syncdoth",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Fixes #44906 ## Summary - Remove `.expand_as(inputs_embeds)` from placeholder mask creation in `get_placeholder_mask` and equivalent inline patterns across all VLM models. `masked_scatter` natively broadcasts `(B, S, 1)` \u2192 `(B, S, H)`, mak\u2026",
+    "changed_files": 71,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44907",
+    "created_at": "2026-03-21T06:07:35Z",
+    "deletions": 222,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44657/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44657",
+    "files_url": "https://github.com/huggingface/transformers/pull/44907/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44907",
     "labels": [],
-    "merged": true,
-    "number": 44657,
+    "merged": false,
+    "number": 44907,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix bug embedding_size mismatch with hidden_size in electra model test",
-    "updated_at": "2026-03-18T10:22:22Z"
+    "state": "open",
+    "title": "Remove unnecessary expand_as in get_placeholder_mask across VLMs",
+    "updated_at": "2026-03-23T12:20:03Z"
   },
   {
-    "additions": 80,
-    "author": "juliendenize",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR add support to mistral-common v1.10.0. This involves: - reasoning effort feature - Python 3.14 Also add `image_sizes` in `apply_chat_template` output to match what is expected by standard processor.",
-    "changed_files": 4,
+    "additions": 13,
+    "author": "NicoleRobin",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - 13 i18n README files used `./awesome-transformers.md` which resolves relative to the `i18n/` directory and leads to a 404 - Replace with the absolute GitHub URL so links work from any location - `README_ko.md` was already corr\u2026",
+    "changed_files": 13,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44656",
-    "created_at": "2026-03-13T10:24:11Z",
-    "deletions": 15,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44905",
+    "created_at": "2026-03-21T03:25:56Z",
+    "deletions": 13,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44656/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44656",
+    "files_url": "https://github.com/huggingface/transformers/pull/44905/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44905",
     "labels": [],
     "merged": true,
-    "number": 44656,
-    "review_comments_count": 1,
+    "number": 44905,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[MistralCommonBackend] Upgrade mistral-common to v1.10.0",
-    "updated_at": "2026-03-16T12:46:29Z"
+    "title": "fix(i18n): replace broken relative links to awesome-transformers.md with absolute URLs",
+    "updated_at": "2026-03-23T12:47:56Z"
   },
   {
-    "additions": 13,
-    "author": "jiqing-feng",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR fixes the AWQ test suite to align with the GPTQModel migration (following #41567 and #42776). ### Changes - **Fix `replace_with_awq_linear` return value**: The function now returns the model directly instead of a tuple `(model, _)`\u2026",
-    "changed_files": 1,
+    "additions": 101,
+    "author": "vivekvar-dl",
+    "author_association": "NONE",
+    "body_excerpt": "# Fix granite_speech config loading failure with int multiplier fields ## Fixes #44877 ### Problem Loading `granite_speech` configs fails with `StrictDataclassFieldValidationError` when multiplier fields (e.g., `embedding_multiplier`) are\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44654",
-    "created_at": "2026-03-13T07:31:19Z",
-    "deletions": 13,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44904",
+    "created_at": "2026-03-21T03:12:37Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44654/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44654",
-    "labels": [],
-    "merged": true,
-    "number": 44654,
+    "files_url": "https://github.com/huggingface/transformers/pull/44904/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44904",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44904,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix AWQ tests for GPTQModel migration",
-    "updated_at": "2026-03-13T16:28:14Z"
+    "title": "fix(granite_speech): convert int to float for multiplier fields in text_config",
+    "updated_at": "2026-03-23T10:37:38Z"
   },
   {
-    "additions": 18,
-    "author": "kaixuanliu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "@zucchini-nlp, can you help review? Thx! unit tests to reproduce this bug: `tests/models/phi4_multimodal/test_modeling_phi4_multimodal.py::Phi4MultimodalIntegrationTest::test_audio_text_generation`",
+    "additions": 16,
+    "author": "yonigozlan",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Some remote code models are using `get_size_dict` directly, and now that size is converted to SizeDict in init, we need to support it as input in `get_size_dict`",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44653",
-    "created_at": "2026-03-13T07:14:25Z",
-    "deletions": 9,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44903",
+    "created_at": "2026-03-21T01:25:53Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44653/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44653",
+    "files_url": "https://github.com/huggingface/transformers/pull/44903/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44903",
     "labels": [],
-    "merged": false,
-    "number": 44653,
-    "review_comments_count": 7,
+    "merged": true,
+    "number": 44903,
+    "review_comments_count": 5,
     "state": "closed",
-    "title": "Fix `AutoImageProcessor` to correctly detect local implementation whe\u2026",
-    "updated_at": "2026-03-20T10:33:32Z"
+    "title": "Support SizeDict import in get_size_dict",
+    "updated_at": "2026-03-23T10:28:52Z"
   },
   {
-    "additions": 8,
-    "author": "gambletan",
+    "additions": 3,
+    "author": "guoyangzhen",
     "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #44623 `processor.save_pretrained()` in v5 only saves the unified `processor_config.json` with nested sub-processor configs, but does not save standalone config files like `preprocessor_config.json` for the image processor\u2026",
+    "body_excerpt": "## Problem `_split_tokens_on_unicode()` crashes with `IndexError: string index out of range` when the decoded token stream ends with a dangling Unicode replacement character (\\uFFFD). The computed index `unicode_offset + decoded.index(repl\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44652",
-    "created_at": "2026-03-13T05:38:10Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44902",
+    "created_at": "2026-03-20T22:08:49Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44652/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44652",
+    "files_url": "https://github.com/huggingface/transformers/pull/44902/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44902",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44652,
+    "number": 44902,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix processor.save_pretrained missing sub-processor config files",
-    "updated_at": "2026-03-13T12:03:37Z"
+    "title": "fix: Whisper word timestamp OOB access on trailing replacement char",
+    "updated_at": "2026-03-23T11:59:14Z"
   },
   {
-    "additions": 10,
-    "author": "gambletan",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #44625 When passing `num_labels` to `AutoConfig.from_pretrained` for Qwen3.5, the value is set on the outer `Qwen3_5Config` but not propagated to `text_config`. This causes `AutoModelForSequenceClassification` to use the d\u2026",
+    "additions": 19,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? The following failing Perceiver use case was identified and fixed in this PR: \u2192 c6d2848a23 ([\ud83d\udea8 Fix torch.jit.trace for interpolate_pos_encoding in all vision models](https://github.com/huggingface/transformers/pul\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44651",
-    "created_at": "2026-03-13T05:35:29Z",
-    "deletions": 0,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44899",
+    "created_at": "2026-03-20T20:02:10Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44651/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44651",
+    "files_url": "https://github.com/huggingface/transformers/pull/44899/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44899",
     "labels": [],
-    "merged": false,
-    "number": 44651,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44899,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Fix Qwen3.5 num_labels not propagated to text_config",
-    "updated_at": "2026-03-13T13:39:43Z"
+    "title": "fix(models): Fix Perceiver interpolate_pos_encoding interpolating to the source size",
+    "updated_at": "2026-03-25T11:54:23Z"
   },
   {
-    "additions": 188,
-    "author": "shaealh",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Closes #44593 ## Summary - use generation_input_ids/generation_attention_mask when provided for decoder-only models - otherwise infer prompt from leading -100 labels and build left-padded prompt batch - return completion tokens for decoder\u2026",
+    "additions": 14,
+    "author": "yonigozlan",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Add compatibility with remote code importing image_processing_utils_fast modules and methods using `from transformers.image_processing_utils_fast import ...`",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44650",
-    "created_at": "2026-03-13T05:33:59Z",
-    "deletions": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44897",
+    "created_at": "2026-03-20T19:30:32Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44650/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44650",
+    "files_url": "https://github.com/huggingface/transformers/pull/44897/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44897",
     "labels": [],
-    "merged": false,
-    "number": 44650,
+    "merged": true,
+    "number": 44897,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Fix Seq2SeqTrainer generation path for decoder-only models",
-    "updated_at": "2026-03-13T13:35:36Z"
+    "state": "closed",
+    "title": "Add backward compatibility for direct imports from legacy `image_processing_utils_fast`",
+    "updated_at": "2026-03-20T20:00:12Z"
   },
   {
-    "additions": 12,
-    "author": "gambletan",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #44623 In v5.3.0, `ProcessorMixin.save_pretrained()` no longer calls `save_pretrained()` on non-tokenizer components (e.g. `image_processor`, `feature_extractor`). This means their individual config files (e.g. `preprocess\u2026",
-    "changed_files": 1,
+    "additions": 354,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "updates the continuous batching docs - new page for the API reference - adds sections for new features like CUDA graphs, async batching, prefix caching, logprobs (depending on when its merged) - clearer example of generation with varying l\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44649",
-    "created_at": "2026-03-13T05:22:42Z",
-    "deletions": 207,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44896",
+    "created_at": "2026-03-20T19:09:41Z",
+    "deletions": 81,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44649/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44649",
+    "files_url": "https://github.com/huggingface/transformers/pull/44896/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44896",
     "labels": [],
-    "merged": false,
-    "number": 44649,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44896,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "fix: ensure processor.save_pretrained saves all component files",
-    "updated_at": "2026-03-13T05:36:54Z"
+    "title": "[docs] continuous batching",
+    "updated_at": "2026-03-30T17:17:13Z"
   },
   {
-    "additions": 0,
-    "author": "gambletan",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #44625 When `num_labels` or `id2label` are passed to `Qwen3_5Config` (e.g. via `AutoConfig.from_pretrained(model_name, num_labels=1)`), they are only set on the outer composite config but **not forwarded to `text_config`**\u2026",
-    "changed_files": 0,
+    "additions": 57,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR enables static FP8 experts. This also works on multi-gpu with device-map. A fix for that was to set was to set `torch.cuda.set_device()`. Triton's JIT compiler uses he active device context to determine whic\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44648",
-    "created_at": "2026-03-13T05:22:03Z",
-    "deletions": 0,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44895",
+    "created_at": "2026-03-20T19:01:35Z",
+    "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44648/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44648",
+    "files_url": "https://github.com/huggingface/transformers/pull/44895/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44895",
     "labels": [],
-    "merged": false,
-    "number": 44648,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44895,
+    "review_comments_count": 4,
     "state": "closed",
-    "title": "fix: propagate num_labels to text_config in Qwen3.5",
-    "updated_at": "2026-03-13T05:33:26Z"
+    "title": "Add static FP8 expert support ",
+    "updated_at": "2026-03-24T14:27:31Z"
   },
   {
-    "additions": 9,
-    "author": "kaixuanliu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "@remi-or @ArthurZucker @McPatate pls help review, thx!",
+    "additions": 10,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "## Problem `ProcessorMixin.to_dict()` was calling `copy.deepcopy(self.__dict__)` on the entire processor, including the tokenizer, even though the tokenizer is always deleted from the output immediately after (since tokenizers are saved se\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44647",
-    "created_at": "2026-03-13T04:55:26Z",
-    "deletions": 6,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44894",
+    "created_at": "2026-03-20T18:57:53Z",
+    "deletions": 9,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44647/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44647",
+    "files_url": "https://github.com/huggingface/transformers/pull/44894/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44894",
     "labels": [],
-    "merged": false,
-    "number": 44647,
+    "merged": true,
+    "number": 44894,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add more generic device support for continuous batching",
-    "updated_at": "2026-03-13T15:43:23Z"
+    "title": "fix `processing_utils.py`: avoid deepcopying tokenizer in `ProcessorMixin` to improve performance",
+    "updated_at": "2026-03-23T10:09:02Z"
   },
   {
-    "additions": 4,
-    "author": "LincolnBurrows2017",
+    "additions": 18,
+    "author": "ai-man-codes",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Fixed typo in Qwen3.5 and Qwen3-VL-MoE model files ```diff - Since Qwen3.5 use timestamps to seperate videos + Since Qwen3.5 use timestamps to separate videos ```",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? Fixes #43011 `StaticLayer` was missing a `.crop(max_length)` method, so implemented that according to the discussion of the issue. Added `StaticLayer.crop(max_length)` to match the API of StaticCache with the Dynami\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44646",
-    "created_at": "2026-03-13T04:48:06Z",
-    "deletions": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44893",
+    "created_at": "2026-03-20T17:48:23Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44646/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44646",
+    "files_url": "https://github.com/huggingface/transformers/pull/44893/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44893",
     "labels": [],
     "merged": false,
-    "number": 44646,
+    "number": 44893,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Fix typo: seperate -> separate",
-    "updated_at": "2026-03-13T15:27:08Z"
+    "title": "add `StaticLayer.crop()` to match `DynamicLayer` API",
+    "updated_at": "2026-03-28T01:41:22Z"
   },
   {
-    "additions": 2,
-    "author": "sywangyi",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "- Intel XPU: @IlyasMoutawwakil ``` ======================================================================== FAILURES ======================================================================== _________________________________________________\u2026",
-    "changed_files": 1,
+    "additions": 51,
+    "author": "he-yufeng",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Fixes #44821 The `elif is_remote_url(...)` / `download_url(...)` branch in `get_image_processor_dict` was accidentally removed during the image processor refactor in #43514. This caused `AutoImageProcessor.from_pretrained(url)` to break wi\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44645",
-    "created_at": "2026-03-13T02:53:31Z",
-    "deletions": 2,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44892",
+    "created_at": "2026-03-20T16:21:25Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44645/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44645",
+    "files_url": "https://github.com/huggingface/transformers/pull/44892/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44892",
     "labels": [],
     "merged": false,
-    "number": 44645,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "fix Image.open failure in case \"tests/models/prompt_depth_anything/te\u2026",
-    "updated_at": "2026-03-26T09:59:37Z"
+    "number": 44892,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": "Fix AutoImageProcessor.from_pretrained failing on URL input",
+    "updated_at": "2026-03-24T13:30:38Z"
   },
   {
-    "additions": 2,
-    "author": "sywangyi",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "\u2026MXFP4PseudoquantTest::test_quantized_model fail in xpu ## Who can review? - quantization: @SunMarc - Intel XPU: @IlyasMoutawwakil",
-    "changed_files": 1,
+    "additions": 507,
+    "author": "kashif",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Add a MoERouterHealthCallback to log MoE router-health metrics. <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the\u2026",
+    "changed_files": 7,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44644",
-    "created_at": "2026-03-13T02:02:45Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44891",
+    "created_at": "2026-03-20T16:17:05Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44644/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44644",
+    "files_url": "https://github.com/huggingface/transformers/pull/44891/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44891",
     "labels": [],
     "merged": false,
-    "number": 44644,
-    "review_comments_count": 6,
+    "number": 44891,
+    "review_comments_count": 0,
     "state": "open",
-    "title": "fix tests/quantization/fp_quant_integration/test_fp_quant.py::FPQuant\u2026",
-    "updated_at": "2026-03-26T05:58:40Z"
+    "title": "[Trainer] add MoERouterHealthCallback Callback",
+    "updated_at": "2026-03-20T16:28:43Z"
   },
   {
-    "additions": 5,
-    "author": "joshuaswanson",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "The generic `labels` docstring in `ModelArgs` says \"masked language modeling loss\" and doesn't mention that causal LM models shift labels internally. This has tripped up a lot of users who pre-shift their labels and end up training next-ne\u2026",
-    "changed_files": 1,
+    "additions": 72,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "As discussed on Slack, this is the first phase of our approach to controlling the code agent epidemic. This PR places large warnings in both the pull request template and `CONTRIBUTING.md`, which should hopefully be seen by most contributo\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44642",
-    "created_at": "2026-03-12T23:47:11Z",
-    "deletions": 1,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44890",
+    "created_at": "2026-03-20T16:12:45Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44642/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44642",
+    "files_url": "https://github.com/huggingface/transformers/pull/44890/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44890",
     "labels": [],
-    "merged": false,
-    "number": 44642,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "Clarify that causal LM labels are shifted internally",
-    "updated_at": "2026-03-13T00:02:30Z"
+    "merged": true,
+    "number": 44890,
+    "review_comments_count": 7,
+    "state": "closed",
+    "title": "Add big angry code agent warnings!",
+    "updated_at": "2026-03-23T11:54:48Z"
   },
   {
-    "additions": 1,
-    "author": "kmbhattt-aws",
+    "additions": 86,
+    "author": "roycho96",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Issue: A full 4D attention mask of shape `[1, 1, seq_len, seq_len]` is being created during attention, even when not using alibi for positional embeddings. - This occupied extra memory during training. Root Cause: T\u2026",
-    "changed_files": 1,
+    "body_excerpt": "## What does this PR do? Calling `trainer.evaluate()` before `trainer.train()` with DeepSpeed is broken in three ways: 1. **ZeRO-3 stale state crash:** `evaluate()` creates an inference engine. `train()` starts with `accelerator.free_memor\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44641",
-    "created_at": "2026-03-12T23:01:11Z",
-    "deletions": 1,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44641/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44641",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44889",
+    "created_at": "2026-03-20T15:08:32Z",
+    "deletions": 21,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44889/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44889",
     "labels": [],
     "merged": false,
-    "number": 44641,
-    "review_comments_count": 1,
+    "number": 44889,
+    "review_comments_count": 0,
     "state": "open",
-    "title": "Conditinally passing and_mask_function arg to create_causal_mask ",
-    "updated_at": "2026-03-13T02:09:22Z"
+    "title": "[DeepSpeed] Fix evaluate()/predict() before train()",
+    "updated_at": "2026-03-21T11:06:07Z"
   },
   {
-    "additions": 11,
-    "author": "michaelbenayoun",
+    "additions": 2,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? - Add `\"embedding_colwise\"` tp plan - Add register methods for `ParallelInterface`. Without it, we can register plans with the register method, but not the corresponding `plan_to_weight_dim` and `plan_to_bias_dim`.",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? In general, it should be much better to let the kernel do what it wants for perfs! There's no reasons to have troubles from it!",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44640",
-    "created_at": "2026-03-12T20:14:06Z",
-    "deletions": 0,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44888",
+    "created_at": "2026-03-20T14:45:28Z",
+    "deletions": 22,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44640/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44640",
+    "files_url": "https://github.com/huggingface/transformers/pull/44888/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44888",
     "labels": [],
-    "merged": true,
-    "number": 44640,
+    "merged": false,
+    "number": 44888,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Add register method for `ParallelInterface`",
-    "updated_at": "2026-03-13T18:57:48Z"
+    "state": "open",
+    "title": "Remove explicit cuda stream in nemotron_h",
+    "updated_at": "2026-03-26T19:15:29Z"
   },
   {
-    "additions": 24099,
-    "author": "ArthurZucker",
+    "additions": 2,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "## Summary Fixes bugs introduced during the `__init__` \u2192 `@dataclass` conversion in #41250. All are incorrect default values caught by automated comparison of `__init__` signatures (main) vs dataclass fields (PR). | Model | Param | Was | S\u2026",
-    "changed_files": 931,
+    "body_excerpt": "# What does this PR do? As per the title. On currently pinned version, when we run this small snippet (which is called on some model's `__init__` functions \ud83d\ude05): ```python from transformers.integrations.hub_kernels import lazy_load_kernel ca\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44639",
-    "created_at": "2026-03-12T16:49:54Z",
-    "deletions": 38773,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44887",
+    "created_at": "2026-03-20T14:00:33Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44639/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44639",
+    "files_url": "https://github.com/huggingface/transformers/pull/44887/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44887",
     "labels": [],
-    "merged": false,
-    "number": 44639,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44887,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Fix incorrect default values in config dataclass migration",
-    "updated_at": "2026-03-12T16:50:10Z"
+    "title": "Bump kernels version dependency to avoid crashes",
+    "updated_at": "2026-03-20T19:01:51Z"
   },
   {
-    "additions": 19,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. Supersedes https://github.com/huggingface/transformers/pull/44614. This one is the result of a long debugging session and discussion with @vasqu. The issue is as follow: - Backbone ALWAYS need to c\u2026",
-    "changed_files": 3,
+    "additions": 14,
+    "author": "m-matthias",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Prevent crash in class LwDetrImageLoss when using it with float16 automatic mixed precision on a Cuda device. torch.pow causes an autocast to float32 when used with Cuda, which caused a type mismatch at ``` pos_weig\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44638",
-    "created_at": "2026-03-12T16:19:49Z",
-    "deletions": 10,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44638/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44638",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44886",
+    "created_at": "2026-03-20T13:56:08Z",
+    "deletions": 12,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44886/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44886",
     "labels": [],
     "merged": true,
-    "number": 44638,
-    "review_comments_count": 2,
+    "number": 44886,
+    "review_comments_count": 4,
     "state": "closed",
-    "title": "Fix output capturing for Backbones",
-    "updated_at": "2026-03-12T17:11:32Z"
+    "title": "LwDetrImageLoss: Fix dtype casting to prevent crash when using amp on cuda device",
+    "updated_at": "2026-03-24T17:02:32Z"
   },
   {
-    "additions": 571,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? - Adds an `httpx` tracer to gather metrics about network calls - Collect and store metrics and generates an artifact in CI - Can be used locally with `DEBUG_NETWORK` - Activated in CircleCI example of local run: ```\u2026",
-    "changed_files": 4,
+    "additions": 2,
+    "author": "guoyangzhen",
+    "author_association": "NONE",
+    "body_excerpt": "## Problem In _split_tokens_on_unicode(), when the decoded token stream ends with a dangling Unicode replacement character (U+FFFD), the computed index can equal len(decoded_full), causing IndexError: string index out of range. The failing\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44636",
-    "created_at": "2026-03-12T15:25:10Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44885",
+    "created_at": "2026-03-20T13:03:54Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44636/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44636",
-    "labels": [],
-    "merged": true,
-    "number": 44636,
-    "review_comments_count": 5,
+    "files_url": "https://github.com/huggingface/transformers/pull/44885/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44885",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44885,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "feat(ci): added a network debug report",
-    "updated_at": "2026-03-18T19:19:03Z"
+    "title": "fix: prevent IndexError in Whisper word timestamp decode",
+    "updated_at": "2026-03-23T12:01:50Z"
   },
   {
-    "additions": 111,
-    "author": "RyanMullins",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Per a conversion with @Cyrilvallez on Slack on how to make Gemma models more compatible with modular inheritance, this PR: * Updates Gemma models to use `nn.parameter.Buffer` instead of a `self.register_buffer()` fo\u2026",
-    "changed_files": 32,
+    "additions": 14,
+    "author": "hmellor",
+    "author_association": "MEMBER",
+    "body_excerpt": "Some libraries that use Transformers (i.e. vLLM) use `|` on the `size` config. This PR adds `__or__` and `__ror__` so that the following works: ```console $ {\"longest_edge\": 20} | SizeDict(height=10, width=20) {'longest_edge': 20, 'height'\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44635",
-    "created_at": "2026-03-12T14:47:46Z",
-    "deletions": 87,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44635/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44635",
-    "labels": [],
-    "merged": false,
-    "number": 44635,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "[Gemma] Modular-friendly buffers",
-    "updated_at": "2026-03-18T10:44:25Z"
-  },
-  {
-    "additions": 30,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title",
-    "changed_files": 3,
-    "cluster_id": "cluster-44053-8",
-    "cluster_ids": [
-      "cluster-44053-8"
-    ],
-    "cluster_role": "member",
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44634",
-    "created_at": "2026-03-12T14:04:36Z",
-    "deletions": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44884",
+    "created_at": "2026-03-20T11:52:15Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44634/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44634",
+    "files_url": "https://github.com/huggingface/transformers/pull/44884/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44884",
     "labels": [],
     "merged": true,
-    "number": 44634,
-    "review_comments_count": 1,
+    "number": 44884,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Fix lfm2 kernel path",
-    "updated_at": "2026-03-12T15:00:59Z"
+    "title": "Add missing dunder methods to `SizeDict`",
+    "updated_at": "2026-03-20T12:21:12Z"
   },
   {
-    "additions": 26,
-    "author": "eustlb",
+    "additions": 2,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per title \ud83e\udd17",
+    "body_excerpt": "# What does this PR do? Fix https://github.com/huggingface/transformers/issues/44589.",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44633",
-    "created_at": "2026-03-12T13:35:48Z",
-    "deletions": 11,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44883",
+    "created_at": "2026-03-20T11:43:13Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44633/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44633",
+    "files_url": "https://github.com/huggingface/transformers/pull/44883/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44883",
     "labels": [],
     "merged": true,
-    "number": 44633,
+    "number": 44883,
     "review_comments_count": 2,
     "state": "closed",
-    "title": "[medasr] doc update",
-    "updated_at": "2026-03-16T09:39:50Z"
+    "title": "Fix dtype guessing from state dict",
+    "updated_at": "2026-03-20T13:12:34Z"
   },
   {
-    "additions": 35,
-    "author": "Abdennacer-Badaoui",
+    "additions": 1,
+    "author": "itazap",
     "author_association": "MEMBER",
-    "body_excerpt": "Summary - Update test expectations for affected models - Add some needed dependencies - Fix TypeError: `GenerationMixin.prepare_inputs_for_generation()` got multiple values for argument 'next_sequence_length' in Qwen2.5-Omni talker by pass\u2026",
-    "changed_files": 6,
+    "body_excerpt": "fixes ```python model = \"meta-llama/Llama-4-Maverick-17B-128E-Instruct\" tok_auto = AutoTokenizer.from_pretrained(model) print(f\"AutoTokenizer: {tok_auto('hello')}\") ``` ``` The above exception was the direct cause of the following exceptio\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44632",
-    "created_at": "2026-03-12T13:32:33Z",
-    "deletions": 22,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44882",
+    "created_at": "2026-03-20T11:31:20Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44632/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44632",
+    "files_url": "https://github.com/huggingface/transformers/pull/44882/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44882",
     "labels": [],
-    "merged": true,
-    "number": 44632,
-    "review_comments_count": 6,
+    "merged": false,
+    "number": 44882,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[AMD CI] Fix test failures across important models ",
-    "updated_at": "2026-03-17T14:58:10Z"
+    "title": "fix config type",
+    "updated_at": "2026-03-20T16:34:20Z"
   },
   {
-    "additions": 33,
-    "author": "RyanMullins",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Updates the weights conversion scripts for Gemma to: * Use the new `SentencePieceExtractor` class to get the vocab and merges from the SPM * Always initialize and save the unified `GemmaTokenizer` class ## Before su\u2026",
-    "changed_files": 4,
+    "additions": 142,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per title, we don't need a weird way to filter out kwargs anymore because now we don't rely on `tokenizer.apply_chat_template`. I didn't delete the unused `TypedDict` yet and will deprecate for at least 3 minor r\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44631",
-    "created_at": "2026-03-12T13:32:25Z",
-    "deletions": 45,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44881",
+    "created_at": "2026-03-20T10:44:06Z",
+    "deletions": 82,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44631/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44631",
+    "files_url": "https://github.com/huggingface/transformers/pull/44881/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44881",
     "labels": [],
     "merged": true,
-    "number": 44631,
-    "review_comments_count": 0,
+    "number": 44881,
+    "review_comments_count": 12,
     "state": "closed",
-    "title": "[Gemma] Update conversion scripts for Transformers v5 Comaptibility",
-    "updated_at": "2026-03-18T10:39:53Z"
+    "title": "Allow arbitrary template kwargs in processors",
+    "updated_at": "2026-03-27T11:07:08Z"
   },
   {
-    "additions": 42,
-    "author": "MaybeSam05",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes a bug where `num_labels` passed to `AutoConfig.from_pretrained` for Qwen3.5 did not propagate from the top\u2011level `Qwen3_5Config` into the `text_config`, so `AutoModelForSequenceClassification` still saw the de\u2026",
-    "changed_files": 2,
+    "additions": 34,
+    "author": "itazap",
+    "author_association": "MEMBER",
+    "body_excerpt": "incorrect model list update",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44630",
-    "created_at": "2026-03-12T13:25:54Z",
-    "deletions": 0,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44880",
+    "created_at": "2026-03-20T10:37:13Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44630/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44630",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44630,
+    "files_url": "https://github.com/huggingface/transformers/pull/44880/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44880",
+    "labels": [],
+    "merged": true,
+    "number": 44880,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix Qwen3.5 num_labels propagation to text_config (fix #44625)",
-    "updated_at": "2026-03-12T13:46:07Z"
+    "title": "incorrect model list update",
+    "updated_at": "2026-03-24T09:27:24Z"
   },
   {
-    "additions": 15,
-    "author": "zucchini-nlp",
+    "additions": 448,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixes \"auto\" dtype when the model is initialized `from_config` It was already fixed for `from_pretrained` in https://github.com/huggingface/transformers/pull/42990 but vLLM creates models with `AutoModel._from_confi\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? - uses the Makefile as a single source of truth for running QA checks - adds `tomli` so `make` commands can read the `toml` file when needed - adds a `checkers` Python module that wraps and orchestrates all `checks`\u2026",
+    "changed_files": 7,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44629",
-    "created_at": "2026-03-12T13:07:55Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44879",
+    "created_at": "2026-03-20T10:24:29Z",
+    "deletions": 90,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44629/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44629",
+    "files_url": "https://github.com/huggingface/transformers/pull/44879/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44879",
     "labels": [],
     "merged": true,
-    "number": 44629,
-    "review_comments_count": 3,
+    "number": 44879,
+    "review_comments_count": 6,
     "state": "closed",
-    "title": "Ensure same `dtype` for subconfig when `_from_config`",
-    "updated_at": "2026-03-13T11:35:10Z"
+    "title": "refactor: unify QA calls",
+    "updated_at": "2026-03-25T08:51:30Z"
   },
   {
-    "additions": 37,
-    "author": "ydshieh",
+    "additions": 8,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? - `encoder_config` and `decoder_config` should return `None` for encoder / decoder config classes themselves. - The encoder / decoder model classes should have the correct config classes associated to them <!-- CURS\u2026",
-    "changed_files": 3,
+    "body_excerpt": "# What does this PR do? `check_docstrings` has been complaining for a while about those.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44628",
-    "created_at": "2026-03-12T12:24:44Z",
-    "deletions": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44878",
+    "created_at": "2026-03-20T10:01:08Z",
+    "deletions": 8,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44628/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44628",
+    "files_url": "https://github.com/huggingface/transformers/pull/44878/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44878",
     "labels": [],
     "merged": true,
-    "number": 44628,
-    "review_comments_count": 1,
+    "number": 44878,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix for `VibeVoiceAcousticTokenizer`",
-    "updated_at": "2026-03-12T13:33:02Z"
+    "title": "Fix nemotron config docstrings",
+    "updated_at": "2026-03-20T10:11:04Z"
   },
   {
-    "additions": 141,
-    "author": "zucchini-nlp",
+    "additions": 1,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? The diff in revert mapping is needed, otherwise we get failures in a few models, see https://app.circleci.com/pipelines/github/huggingface/transformers/167425/workflows/fa96efe5-f810-408e-bafd-de03b7e881aa/jobs/2208\u2026",
-    "changed_files": 78,
+    "body_excerpt": "# What does this PR do?",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 16,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44627",
-    "created_at": "2026-03-12T12:00:31Z",
-    "deletions": 367,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44876",
+    "created_at": "2026-03-20T09:49:54Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44627/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44627",
+    "files_url": "https://github.com/huggingface/transformers/pull/44876/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44876",
     "labels": [],
     "merged": true,
-    "number": 44627,
-    "review_comments_count": 8,
+    "number": 44876,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Move VLM conversions to the main mapping",
-    "updated_at": "2026-03-17T10:13:03Z"
+    "title": "Fix nemotron_h modular",
+    "updated_at": "2026-03-20T10:00:35Z"
   },
   {
-    "additions": 11,
-    "author": "ArthurZucker",
+    "additions": 872,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Adds a missing branch. I don't really know if this is worth it, can't find a model online that enforces the flag to `True`",
+    "body_excerpt": "# What does this PR do? Refactors `src/transformers/cli/serve.py` to reduce nesting depth, eliminate code duplication, and improve maintainability. No behavioral changes and the public API is unchanged. Also added a module docstring to exp\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44626",
-    "created_at": "2026-03-12T11:23:21Z",
-    "deletions": 5,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44626/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44626",
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44875",
+    "created_at": "2026-03-20T09:06:34Z",
+    "deletions": 701,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44875/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44875",
     "labels": [],
     "merged": false,
-    "number": 44626,
-    "review_comments_count": 1,
+    "number": 44875,
+    "review_comments_count": 0,
     "state": "open",
-    "title": "don't break legacy behavior when enforced!",
-    "updated_at": "2026-03-12T11:32:46Z"
+    "title": "refactor: improved the cli server module code organization",
+    "updated_at": "2026-03-23T08:08:17Z"
   },
   {
-    "additions": 34,
-    "author": "ydshieh",
+    "additions": 2,
+    "author": "hmellor",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Follow-up of #44549",
-    "changed_files": 16,
+    "body_excerpt": "`Llama4`'s was incorrect and causing `StrictDataclassFieldValidationErrors`. `AFMoE`'s was was fine but now it's more specific.",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44624",
-    "created_at": "2026-03-12T09:26:17Z",
-    "deletions": 34,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44874",
+    "created_at": "2026-03-20T09:05:02Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44624/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44624",
+    "files_url": "https://github.com/huggingface/transformers/pull/44874/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44874",
     "labels": [],
     "merged": true,
-    "number": 44624,
+    "number": 44874,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix more wrong HF hub checkpoint names",
-    "updated_at": "2026-03-12T09:59:12Z"
+    "title": "Fix `layer_types` type hint for `AFMoE` and `Llama4`",
+    "updated_at": "2026-03-20T12:03:58Z"
   },
   {
-    "additions": 17,
-    "author": "LysandreJik",
+    "additions": 75,
+    "author": "sergiopaniego",
     "author_association": "MEMBER",
-    "body_excerpt": "CB temporarily disabled on non-cuda devices as it's currently using cuda streams by default.",
-    "changed_files": 3,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44622",
-    "created_at": "2026-03-12T08:11:10Z",
-    "deletions": 4,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44622/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44622",
-    "labels": [],
-    "merged": false,
-    "number": 44622,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "CB temporarily disabled on non-cuda devices",
-    "updated_at": "2026-03-16T00:00:20Z"
-  },
-  {
-    "additions": 0,
-    "author": "KoichiYasuoka",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes #43170 for `modelcard` removal Quick reproduce: ``` from transformers import pipeline fmp=pipeline(\"fill-mask\",\"google-bert/bert-base-cased\") fmp.save_pretrained(\"tmpdir\") ``` ## Before submitting - [ ] This P\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? ## Problem Online RL training (GRPO, RLOO, PPO) with all VL models using MRoPE with rope_deltas (Qwen2-VL, Qwen2.5-VL, Qwen3-VL, Qwen3.5, GLM4V, PaddleOCR-VL, Ernie4.5-VL-MoE, etc.) crashes with `RuntimeError: Sizes\u2026",
+    "changed_files": 15,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44621",
-    "created_at": "2026-03-12T08:04:29Z",
-    "deletions": 3,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44873",
+    "created_at": "2026-03-20T08:38:03Z",
+    "deletions": 30,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44621/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44621",
+    "files_url": "https://github.com/huggingface/transformers/pull/44873/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44873",
     "labels": [],
     "merged": true,
-    "number": 44621,
+    "number": 44873,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "pipelines do not have modelcard",
-    "updated_at": "2026-03-13T14:28:48Z"
+    "title": "Fix VL model rope_deltas batch size mismatch in online RL training",
+    "updated_at": "2026-03-20T13:51:08Z"
   },
   {
-    "additions": 15,
-    "author": "LysandreJik",
-    "author_association": "MEMBER",
-    "body_excerpt": "FastAPI doesn't play well with `from __future__ import annotations`. This PR reverts this change and correctly guards against unprotected optional imports. Reverts https://github.com/huggingface/transformers/pull/44256",
+    "additions": 2,
+    "author": "IvanFan-Van",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Description Update outdated comment that references non-existent file `generation_utils_samplers.py` ## Changes Detail - The comment on line 1200 states \"all samplers can be found in `generation_utils_samplers.py`\" - In reality, all sam\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44620",
-    "created_at": "2026-03-12T07:56:55Z",
-    "deletions": 16,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44872",
+    "created_at": "2026-03-20T05:45:46Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44620/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44620",
+    "files_url": "https://github.com/huggingface/transformers/pull/44872/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44872",
     "labels": [],
-    "merged": true,
-    "number": 44620,
+    "merged": false,
+    "number": 44872,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix transformers serve's 422 unprocessable entity",
-    "updated_at": "2026-03-16T13:41:44Z"
+    "state": "open",
+    "title": "Fix: Update outdated sampler comment in generation/utils.py",
+    "updated_at": "2026-03-20T05:45:46Z"
   },
   {
-    "additions": 43,
-    "author": "yunhaoli24",
+    "additions": 666,
+    "author": "JonusClapshaw",
     "author_association": "NONE",
-    "body_excerpt": "## Summary Set `add_bos_token=True` and `add_eos_token=True` by default in `DebertaV2Tokenizer` to fix the regression where `add_special_tokens=True` doesn't add BOS/EOS tokens for `microsoft/mdeberta-v3-base` tokenizer in transformers >=5\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? Fixes #42200 `prediction_step` is type-hinted to return `Optional[torch.Tensor]` for logits, but when no `preprocess_logits_for_metrics` is provided it could return a tuple instead of a tensor. This caused `torch_pa\u2026",
+    "changed_files": 33,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44618",
-    "created_at": "2026-03-12T04:46:19Z",
-    "deletions": 0,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44870",
+    "created_at": "2026-03-20T02:28:27Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44618/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44618",
+    "files_url": "https://github.com/huggingface/transformers/pull/44870/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44870",
     "labels": [],
     "merged": false,
-    "number": 44618,
+    "number": 44870,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: Add BOS/EOS tokens by default for DeBERTa v2 tokenizer",
-    "updated_at": "2026-03-16T05:28:25Z"
+    "title": "fix: ensure prediction_step returns tensor for logits, not tuple #42200",
+    "updated_at": "2026-03-20T17:51:19Z"
   },
   {
-    "additions": 7,
-    "author": "s-zx",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Add fallback to bfloat16 when Float8 dtype fails to set, preventing TypeError when loading FP8 models on PyTorch builds without Float8_e4m3fnStorage support. ## Root Cause `torch.set_default_dtype(dtype)` raises `TypeError: coul\u2026",
-    "changed_files": 1,
+    "additions": 98,
+    "author": "sdharani91",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes #44717 This PR fixes packed-sequence handling for the Qwen3.5 linear-attention fast path. Before this change, Qwen3.5 produced different outputs for: a padded representation of multiple sequences a packed repr\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44616",
-    "created_at": "2026-03-11T23:00:15Z",
-    "deletions": 1,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44867",
+    "created_at": "2026-03-19T17:31:45Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44616/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44616",
+    "files_url": "https://github.com/huggingface/transformers/pull/44867/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44867",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44616,
+    "number": 44867,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: add Float8 dtype fallback in modeling_utils.py",
-    "updated_at": "2026-03-18T16:02:54Z"
+    "title": "Pass packed boundary metadata to Qwen3.5 linear-attention fast kernels",
+    "updated_at": "2026-03-26T21:02:58Z"
   },
   {
-    "additions": 35,
-    "author": "MaybeSam05",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Restores `is_torch_fx_available` in `transformers.utils.import_utils` as a backwards-compatibility shim so that Hub models loaded with `trust_remote_code=True` that still import this symbol no longer raise `ImportEr\u2026",
-    "changed_files": 1,
+    "additions": 78,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44615",
-    "created_at": "2026-03-11T22:52:23Z",
-    "deletions": 0,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44866",
+    "created_at": "2026-03-19T17:27:58Z",
+    "deletions": 75,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44615/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44615",
+    "files_url": "https://github.com/huggingface/transformers/pull/44866/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44866",
     "labels": [],
-    "merged": false,
-    "number": 44615,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "Restore is_torch_fx_available for trust_remote_code backwards compatibility",
-    "updated_at": "2026-03-12T10:33:43Z"
+    "merged": true,
+    "number": 44866,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Align lfm2 cache to other mamba caches",
+    "updated_at": "2026-03-20T10:50:28Z"
   },
   {
-    "additions": 19,
-    "author": "vasqu",
+    "additions": 496,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "Only detected in #43590, so it can only be detected there for `test_sdpa_can_compile_dynamic` (`lw_detr`). Core issue: Dynamo can cache the attribute and ignore it across frames which means that updates/reads are not working as expected. T\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? Added Rule 11 forward() must not access non-nn.Module attributes on submodules (breaks pipeline parallelism with Identity replacement). we want to make sure we just use metadata in config and elesewere when in that\u2026",
+    "changed_files": 10,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44614",
-    "created_at": "2026-03-11T20:49:51Z",
-    "deletions": 22,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44865",
+    "created_at": "2026-03-19T16:39:59Z",
+    "deletions": 26,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44614/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44614",
+    "files_url": "https://github.com/huggingface/transformers/pull/44865/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44865",
     "labels": [],
-    "merged": false,
-    "number": 44614,
-    "review_comments_count": 10,
+    "merged": true,
+    "number": 44865,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "[`Compile`] Fix capture outputs during compile",
-    "updated_at": "2026-03-13T02:15:21Z"
+    "title": "chore(typing): added rule 11",
+    "updated_at": "2026-03-23T12:29:21Z"
   },
   {
-    "additions": 105,
-    "author": "stevhliu",
+    "additions": 99,
+    "author": "SunMarc",
     "author_association": "MEMBER",
-    "body_excerpt": "adds docs for tensor parallelism for training",
-    "changed_files": 3,
+    "body_excerpt": "# What does this PR do? This PR switches FP8 per-tensor implementation to rely on the official torch impl `torch._scaled_mm`. Note that `torch._scaled_mm` don't explicitly support per tensor. We hack the api a bit as it only support per ro\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44613",
-    "created_at": "2026-03-11T20:43:53Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44864",
+    "created_at": "2026-03-19T16:19:53Z",
+    "deletions": 12,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44613/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44613",
+    "files_url": "https://github.com/huggingface/transformers/pull/44864/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44864",
     "labels": [],
     "merged": false,
-    "number": 44613,
+    "number": 44864,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "[docs] tp training",
-    "updated_at": "2026-03-11T21:08:32Z"
-  },
-  {
-    "additions": 1,
-    "author": "michaelbenayoun",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? The output function hook in `EmbeddingParallel` casts the mask to fp32. It breaks things for neuron devices. Suggested fix: cast to the outputs' dtype.",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44612",
-    "created_at": "2026-03-11T20:09:41Z",
-    "deletions": 1,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44612/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44612",
-    "labels": [],
-    "merged": true,
-    "number": 44612,
-    "review_comments_count": 3,
     "state": "closed",
-    "title": "fix: cast to proper dtype in EmbeddingParallel",
-    "updated_at": "2026-03-12T21:08:04Z"
+    "title": "Switch FP8 per tensor quant to use `torch._scaled_mm`",
+    "updated_at": "2026-03-20T19:05:05Z"
   },
   {
-    "additions": 15,
-    "author": "harshaljanjani",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? The following issue was identified and fixed in this PR: \u2192 [This PR (\ud83d\udea8 Delete duplicate code in backbone utils)](https://github.com/huggingface/transformers/pull/43323) structured config loading to use [BackboneMi\u2026",
+    "additions": 19,
+    "author": "gh-wf",
+    "author_association": "NONE",
+    "body_excerpt": "Some models (e.g. Nemotron-H) define `_tied_weights_keys` as a list, which caused `AttributeError: 'list' object has no attribute 'keys'` when calling `save_pretrained` during full finetuning. # What does this PR do? `_get_tied_weight_keys\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 9,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44611",
-    "created_at": "2026-03-11T20:02:14Z",
-    "deletions": 3,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44862",
+    "created_at": "2026-03-19T15:14:12Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44611/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44611",
-    "labels": [],
-    "merged": true,
-    "number": 44611,
-    "review_comments_count": 12,
+    "files_url": "https://github.com/huggingface/transformers/pull/44862/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44862",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44862,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(models): Forward timm model kwargs to timm.create_model for OmDet-Turbo",
-    "updated_at": "2026-03-13T11:57:20Z"
+    "title": "fix: handle list-type _tied_weights_keys in _get_tied_weight_keys",
+    "updated_at": "2026-03-20T09:47:09Z"
   },
   {
-    "additions": 0,
-    "author": "michaelbenayoun",
+    "additions": 11,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? By default, `initialize_tensor_parallelism` hides stdout and stderr for ranks > 0. While convenient, this is not perfect for dev and debugging. I suggest we simply add a flag to be able to disable this feature if wa\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? As per the title. As discussed offline with the Mistral team, the scaling applied to the query should not be the absolute one (old `cache_position`), but the actual `position_ids`, taking into account padding, packe\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44608",
-    "created_at": "2026-03-11T18:57:01Z",
-    "deletions": 7,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44860",
+    "created_at": "2026-03-19T14:27:33Z",
+    "deletions": 17,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44608/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44608",
+    "files_url": "https://github.com/huggingface/transformers/pull/44860/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44860",
     "labels": [],
     "merged": true,
-    "number": 44608,
-    "review_comments_count": 2,
+    "number": 44860,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Allow to disable stdout hiding for TP",
-    "updated_at": "2026-03-12T19:36:06Z"
+    "title": "[Mistral] Fix query scaling for Mistral4 and Ministral3",
+    "updated_at": "2026-03-19T18:02:06Z"
   },
   {
-    "additions": 9,
-    "author": "gabe-l-hart",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR fixes generation for models using the `Idefics3ForConditionalGeneration` architecture with `use_cache=False`. ## Testing <details> <summary>docling_repro.py</summary> ```py import os import torch import time\u2026",
-    "changed_files": 3,
+    "additions": 7001,
+    "author": "philippguevorguian",
+    "author_association": "NONE",
+    "body_excerpt": null,
+    "changed_files": 19,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 10,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44607",
-    "created_at": "2026-03-11T18:41:58Z",
-    "deletions": 3,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44859",
+    "created_at": "2026-03-19T13:54:19Z",
+    "deletions": 138,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44607/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44607",
+    "files_url": "https://github.com/huggingface/transformers/pull/44859/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44859",
     "labels": [],
-    "merged": true,
-    "number": 44607,
+    "merged": false,
+    "number": 44859,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Idefics3 without cache fix",
-    "updated_at": "2026-03-16T15:23:34Z"
+    "title": "refactor: rope in model, flatten vision, rely on qwen3 backone, misc changes",
+    "updated_at": "2026-03-19T14:08:01Z"
   },
   {
-    "additions": 26,
-    "author": "itazap",
+    "additions": 111,
+    "author": "remi-or",
     "author_association": "MEMBER",
-    "body_excerpt": "In v5, we enforce creating a model-specific tokenizer (ex. LlamaTokenizer, Qwen2Tokenizer, et .) object when specified. 1. For instance, when `tokenizer_class` is set in `tokenization_config.json` 2. Or when using the auto_mapped `tokenize\u2026",
+    "body_excerpt": "Right now, the continuous batching tests all use similar mechanisms, namely: 1. loading a model and a tokenizer 2. preparing data for generate or generate_batch 3. running generate to compare its outputs with generate_batch This PR adds 3\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44606",
-    "created_at": "2026-03-11T17:29:12Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44858",
+    "created_at": "2026-03-19T13:22:04Z",
+    "deletions": 188,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44606/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44606",
+    "files_url": "https://github.com/huggingface/transformers/pull/44858/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44858",
     "labels": [],
-    "merged": false,
-    "number": 44606,
+    "merged": true,
+    "number": 44858,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "optionally override tokenizer class with serialized tokenizer ",
-    "updated_at": "2026-03-17T16:03:19Z"
+    "state": "closed",
+    "title": "[CB] [Minor] Simplify test suite",
+    "updated_at": "2026-03-24T11:44:39Z"
   },
   {
-    "additions": 540,
-    "author": "stevhliu",
-    "author_association": "MEMBER",
-    "body_excerpt": "* refactors DeepSpeed ZeRO doc: - moves the troubleshooting section to the more general Debugging guide to keep everything in one place - moves the sequence parallelism section into a new doc to give it more visibility - update to be more\u2026",
-    "changed_files": 4,
+    "additions": 3,
+    "author": "hkc5",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Python 3.13's stricter parser fails when there's a comment between the `@torch.jit.script` decorator and the function definition, causing an IndentationError when importing DebertaV2Model. ## Changes - Moved comments before the\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44605",
-    "created_at": "2026-03-11T17:26:12Z",
-    "deletions": 1176,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44856",
+    "created_at": "2026-03-19T12:33:00Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44605/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44605",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44856/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44856",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44605,
+    "number": 44856,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "[docs] zero + sequence parallelism",
-    "updated_at": "2026-03-16T20:31:23Z"
+    "state": "closed",
+    "title": "fix: move comments before @torch.jit.script decorator for Python 3.13 compatibility",
+    "updated_at": "2026-03-19T13:11:44Z"
   },
   {
-    "additions": 415,
-    "author": "SunMarc",
+    "additions": 63,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR fix the quantization CI : - [x] autoround - [x] bnb - [x] fp_quant_integration - [x] metal - [x] mxfp4 - [x] quark_integration - [x] torchao_integration One major point in this PR is that I bump the min vers\u2026",
-    "changed_files": 19,
+    "body_excerpt": "# What does this PR do? We had (flaky) ```bash tests/models/nemotron_h/test_modeling_nemotron_h.py::NemotronHModelTest::test_sdpa_can_compile_dynamic Fatal Python error: Segmentation fault ``` `NemotronHBlock.forward` creates a temporary `\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 33,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44604",
-    "created_at": "2026-03-11T16:44:38Z",
-    "deletions": 912,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44604/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44604",
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44854",
+    "created_at": "2026-03-19T10:54:36Z",
+    "deletions": 56,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44854/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44854",
     "labels": [],
     "merged": true,
-    "number": 44604,
-    "review_comments_count": 0,
+    "number": 44854,
+    "review_comments_count": 5,
     "state": "closed",
-    "title": "Bump torchao >=0.15 and fix quantization CI",
-    "updated_at": "2026-03-16T16:07:12Z"
+    "title": "Fix core dumped when `NemotronH` is torch compiled",
+    "updated_at": "2026-03-20T14:29:16Z"
   },
   {
-    "additions": 17,
-    "author": "michalrzak",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? A quick fix that updates the Dockerfile to run on `arm64` systems (such as the NVIDIA Spark). The previous version of the Dockerfile fails on `arm64` systems due to `SudachiPy`, which only provides wheels for `x86_6\u2026",
-    "changed_files": 1,
+    "additions": 99,
+    "author": "sergiopaniego",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? `Zamba2MambaMixer.__init__` calls `lazy_load_kernel(\"mamba-ssm\")` and `lazy_load_kernel(\"causal-conv1d\")` unconditionally. Models that inherit from it (like NemotronH) and set `use_mamba_kernels=False` in their conf\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44603",
-    "created_at": "2026-03-11T16:42:30Z",
-    "deletions": 2,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44853",
+    "created_at": "2026-03-19T10:22:40Z",
+    "deletions": 72,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44603/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44603",
+    "files_url": "https://github.com/huggingface/transformers/pull/44853/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44853",
     "labels": [],
     "merged": false,
-    "number": 44603,
+    "number": 44853,
     "review_comments_count": 0,
     "state": "open",
-    "title": "fixed dockerfile for arm64 systems",
-    "updated_at": "2026-03-11T16:42:30Z"
+    "title": "Fix Zamba2MambaMixer ignoring use_mamba_kernels=False",
+    "updated_at": "2026-03-31T06:17:20Z"
   },
   {
-    "additions": 218,
-    "author": "Cyrilvallez",
+    "additions": 117,
+    "author": "zucchini-nlp",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. Follow up of https://github.com/huggingface/transformers/pull/44330 Also take the opportunity to simplify t5 and its children, because the way they compute`position_bias` was super convoluted/overc\u2026",
-    "changed_files": 61,
+    "body_excerpt": "An eos token can also be a list on most recent models, so this PR allows all `EOS` in config be a list as well. Same for q-lora-rank which apparently can be an explicit `None` for some model Also bring back `layer_type_validation` and add\u2026",
+    "changed_files": 92,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44602",
-    "created_at": "2026-03-11T16:19:43Z",
-    "deletions": 1083,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44851",
+    "created_at": "2026-03-19T09:53:31Z",
+    "deletions": 101,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44602/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44602",
+    "files_url": "https://github.com/huggingface/transformers/pull/44851/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44851",
     "labels": [],
     "merged": true,
-    "number": 44602,
-    "review_comments_count": 14,
+    "number": 44851,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Remove `cache_position` in more models (2)",
-    "updated_at": "2026-03-12T22:38:15Z"
+    "title": "Update some type hints",
+    "updated_at": "2026-03-19T16:30:32Z"
   },
   {
-    "additions": 510,
-    "author": "ArthurZucker",
+    "additions": 5,
+    "author": "vasqu",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Goal is to be able to run a model with both PP and TP.",
-    "changed_files": 4,
+    "body_excerpt": "See #44458 This is a deep issue tbh - the cross attentions are reshaped into a different shape than the text input leading to a mismatch between batch sizes. This only gets noticed during compile as it is more strict about the concrete sha\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44601",
-    "created_at": "2026-03-11T15:56:51Z",
-    "deletions": 2,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44850",
+    "created_at": "2026-03-19T08:36:18Z",
+    "deletions": 6,
     "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44601/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44601",
+    "files_url": "https://github.com/huggingface/transformers/pull/44850/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44850",
     "labels": [],
     "merged": false,
-    "number": 44601,
+    "number": 44850,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "[Distributed] Add PP support natively",
-    "updated_at": "2026-03-12T11:53:24Z"
+    "state": "closed",
+    "title": "[`Mllama`] Fix workaround compile",
+    "updated_at": "2026-03-26T13:02:24Z"
   },
   {
-    "additions": 0,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Small mistake in https://github.com/huggingface/transformers/pull/44432. cc @zucchini-nlp, was it intended to remove the scaling? (I assume so since the embedding now has the saling baked in, and I guess paligemma a\u2026",
-    "changed_files": 1,
+    "additions": 58,
+    "author": "Sai-Suraj-27",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Update Qwen3_Omni_Moe, to fix these attribute errors [Qwen3OmniModelIntegrationTests](https://github.com/huggingface/transformers/actions/runs/23230643883/job/67524756897#step:14:1131) <img width=\"2292\" height=\"161\"\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44600",
-    "created_at": "2026-03-11T15:40:16Z",
-    "deletions": 1,
+    "comments_count": 46,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44848",
+    "created_at": "2026-03-19T07:30:39Z",
+    "deletions": 38,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44600/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44600",
+    "files_url": "https://github.com/huggingface/transformers/pull/44848/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44848",
     "labels": [],
     "merged": true,
-    "number": 44600,
-    "review_comments_count": 0,
+    "number": 44848,
+    "review_comments_count": 5,
     "state": "closed",
-    "title": "Remove useless identity assignment",
-    "updated_at": "2026-03-12T10:21:23Z"
+    "title": "Fix few issues in Qwen_3_Omni_Moe",
+    "updated_at": "2026-03-30T16:43:00Z"
   },
   {
-    "additions": 3,
-    "author": "hf-security-analysis[bot]",
-    "author_association": "NONE",
-    "body_excerpt": "Update `.github/workflows/self-comment-ci.yml` workflow configuration. cc @vasqu Closes huggingface/tracking-issues#17",
+    "additions": 68,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Activated `anti-slop` action. Enabled checks: - `min-account-age: 30` to catch brand-new throwaway accounts, which are common in automated spam waves. - `max-daily-forks: 7` to catch accounts that fork many reposito\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44599",
-    "created_at": "2026-03-11T15:39:30Z",
-    "deletions": 161,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44847",
+    "created_at": "2026-03-19T07:15:38Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44599/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44599",
+    "files_url": "https://github.com/huggingface/transformers/pull/44847/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44847",
     "labels": [],
-    "merged": false,
-    "number": 44599,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "chore: update self-comment-ci.yml",
-    "updated_at": "2026-03-11T15:49:09Z"
+    "merged": true,
+    "number": 44847,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "ci: add anti-slop action",
+    "updated_at": "2026-03-26T13:39:58Z"
   },
   {
-    "additions": 1,
-    "author": "vasqu",
-    "author_association": "MEMBER",
-    "body_excerpt": "As per title, updated to remove former members as well cc @yonigozlan @ydshieh",
+    "additions": 64,
+    "author": "RicardoLee510520",
+    "author_association": "FIRST_TIMER",
+    "body_excerpt": "# What does this PR do? Updated the DeiT model card to follow the new standardized format: - Replaced verbose paper abstract with concise model description - Added Pipeline and AutoModel usage examples - Renamed \"Usage tips\" to \"Notes\" - U\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44598",
-    "created_at": "2026-03-11T15:13:25Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44846",
+    "created_at": "2026-03-19T06:30:53Z",
+    "deletions": 90,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44598/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44598",
+    "files_url": "https://github.com/huggingface/transformers/pull/44846/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44846",
     "labels": [],
-    "merged": true,
-    "number": 44598,
+    "merged": false,
+    "number": 44846,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add Yoni to run-slow workflow",
-    "updated_at": "2026-03-11T15:38:10Z"
+    "title": "[Docs] Update DeiT model card to new format",
+    "updated_at": "2026-03-20T05:30:17Z"
   },
   {
-    "additions": 1,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Our beautiful Dashboard is missing ..... damm",
-    "changed_files": 1,
+    "additions": 15,
+    "author": "jiqing-feng",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes `torch.compile` failure for Mllama after #42848 introduced a new unified attention mask creation path. The root cause is a **torch inductor C++ codegen bug**: when `padding_mask_function` uses advanced tensor\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44597",
-    "created_at": "2026-03-11T13:53:02Z",
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44845",
+    "created_at": "2026-03-19T06:14:54Z",
+    "deletions": 5,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44845/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44845",
+    "labels": [],
+    "merged": false,
+    "number": 44845,
+    "review_comments_count": 1,
+    "state": "open",
+    "title": "Fix Mllama torch.compile failure caused by new attention mask logic",
+    "updated_at": "2026-03-26T13:01:50Z"
+  },
+  {
+    "additions": 8468,
+    "author": "sahilleth",
+    "author_association": "NONE",
+    "body_excerpt": "This PR makes a few small fixes on top of #37875 for the DEIM model: - Ensure `DeimConfig` / `DEIMConfig` and `DeimModel` / `DeimForObjectDetection` are correctly exposed from the `transformers` package. - Fix a configuration docstring lin\u2026",
+    "changed_files": 15,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44844",
+    "created_at": "2026-03-19T05:50:29Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44597/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44597",
+    "files_url": "https://github.com/huggingface/transformers/pull/44844/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44844",
     "labels": [],
-    "merged": true,
-    "number": 44597,
+    "merged": false,
+    "number": 44844,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix CircleCI summary report not showing due to missing dependency",
-    "updated_at": "2026-03-20T07:33:38Z"
+    "title": "Fix DEIM config export and public API",
+    "updated_at": "2026-03-19T13:18:59Z"
   },
   {
     "additions": 26,
-    "author": "Desel72",
+    "author": "omyaaa1",
     "author_association": "NONE",
-    "body_excerpt": "Fix TypeError when loading float8 models by falling back to bfloat16 in local_torch_dtype # What does this PR do? When loading FP8 models (e.g. `Qwen/Qwen3.5-35B-A3B-FP8`) with `dtype=\"auto\"`, the auto-detected dtype from checkpoint weight\u2026",
-    "changed_files": 2,
+    "body_excerpt": "Reintroduce handling for remote URLs using download_url, which was accidentally removed in recent versions. This restores support for loading image processor configs directly from URLs. Fixes #44821 # What does this PR do? <!-- Congratulat\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44596",
-    "created_at": "2026-03-11T13:03:19Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44842",
+    "created_at": "2026-03-19T04:48:58Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44596/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44596",
+    "files_url": "https://github.com/huggingface/transformers/pull/44842/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44842",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44596,
+    "number": 44842,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix TypeError when loading float8 models by falling back to bfloat16 in local_torch_dtype",
-    "updated_at": "2026-03-11T14:00:39Z"
+    "title": "Fix AutoImageProcessor URL loading regression",
+    "updated_at": "2026-03-19T11:57:50Z"
   },
   {
-    "additions": 2324,
-    "author": "yonigozlan",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Add [CHMv2 ](https://arxiv.org/abs/2603.06382) to Transformers",
-    "changed_files": 23,
+    "additions": 1,
+    "author": "zhulinchng",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Fix formatting of code block in weightconverter.md # What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 9,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44595",
-    "created_at": "2026-03-11T12:38:44Z",
-    "deletions": 23,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44839",
+    "created_at": "2026-03-19T01:40:58Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44595/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44595",
-    "labels": [
-      "New model",
-      "run-slow"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44839/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44839",
+    "labels": [],
     "merged": true,
-    "number": 44595,
-    "review_comments_count": 30,
+    "number": 44839,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Add CHMv2",
-    "updated_at": "2026-03-11T16:00:03Z"
+    "title": "Correct code block formatting in weightconverter.md",
+    "updated_at": "2026-03-19T07:07:13Z"
   },
   {
-    "additions": 271,
-    "author": "vimal-crypto",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What this PR does This PR brings `ObjectDetectionPipeline` in line with its sister pipelines (`ZeroShotObjectDetectionPipeline`, `ImageClassificationPipeline`) by adding four enhancements to the postprocessing stage. ### Changes **1. Sc\u2026",
+    "additions": 41,
+    "author": "xr843",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - Fixes #44821 \u2014 `AutoImageProcessor.from_pretrained` fails with `OSError: Repo id must be in the form 'repo_name' or 'namespace/repo_name'` when given a URL - The URL handling branch (`is_remote_url` check) in `get_image_proces\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44594",
-    "created_at": "2026-03-11T12:37:46Z",
-    "deletions": 40,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44838",
+    "created_at": "2026-03-18T23:53:54Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44594/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44594",
+    "files_url": "https://github.com/huggingface/transformers/pull/44838/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44838",
     "labels": [],
     "merged": false,
-    "number": 44594,
+    "number": 44838,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "[Pipeline] Add top_k, label filtering, box_format and score sorting to ObjectDetectionPipeline",
-    "updated_at": "2026-03-11T12:37:46Z"
+    "state": "closed",
+    "title": "Fix AutoImageProcessor.from_pretrained failing with URL input",
+    "updated_at": "2026-03-19T10:43:53Z"
   },
   {
-    "additions": 15,
-    "author": "BenjaminBossan",
+    "additions": 482,
+    "author": "stevhliu",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Multiple PEFT tests are failing due to recent changes in transformers. - hf_device_map attribute may not exist in some cases - respect inference_mode in load_adapter - new model loading requires changes for bnb (SCB\u2026",
-    "changed_files": 3,
+    "body_excerpt": "backfills empty model cards like gptoss and nemotronh with a `model-card.md` skill i created. its pretty minimal at the moment and just includes a brief intro and code examples. let me know if there is anything else we should add!",
+    "changed_files": 12,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44592",
-    "created_at": "2026-03-11T10:41:51Z",
-    "deletions": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44837",
+    "created_at": "2026-03-18T21:45:31Z",
+    "deletions": 102,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44592/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44592",
+    "files_url": "https://github.com/huggingface/transformers/pull/44837/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44837",
     "labels": [],
     "merged": true,
-    "number": 44592,
+    "number": 44837,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "FIX Multiple PEFT errors after v5 transition",
-    "updated_at": "2026-03-11T12:24:05Z"
+    "title": "[docs] model cards",
+    "updated_at": "2026-03-20T22:40:41Z"
   },
   {
-    "additions": 60,
-    "author": "moktamd",
-    "author_association": "NONE",
-    "body_excerpt": "Adds `_apply_mps_fixes` in `sdpa_attention.py` to handle two upstream PyTorch MPS bugs: 1. **pytorch/pytorch#176767** (fixed in PyTorch 2.12): pads value tensor when `v_head_dim != q_head_dim` to avoid corrupted output. Affects DeepSeek mo\u2026",
-    "changed_files": 1,
+    "additions": 96,
+    "author": "tyler-romero",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Pass cu_seqlens derived from packed attention masks to FLA's ShortConvolution and chunk_gated_delta_rule kernels, preventing recurrent state from leaking across sequence boundaries during packed-sequence training. F\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44591",
-    "created_at": "2026-03-11T10:32:26Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44591/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44591",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44836",
+    "created_at": "2026-03-18T20:24:58Z",
+    "deletions": 20,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44836/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44836",
     "labels": [],
     "merged": false,
-    "number": 44591,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Add MPS SDPA workarounds for value head dim and bidirectional attention",
-    "updated_at": "2026-03-11T13:37:15Z"
+    "number": 44836,
+    "review_comments_count": 3,
+    "state": "open",
+    "title": "Add cu_seqlens support to OlmoHybridGatedDeltaNet for packed sequences",
+    "updated_at": "2026-03-19T05:34:43Z"
   },
   {
-    "additions": 2,
-    "author": "pranay-3108",
-    "author_association": "NONE",
-    "body_excerpt": "Fixes incorrect documentation for `position_ids` in `masking_utils.py`. The docstring previously described `position_ids` as `torch.Tensor`. This PR updates it to `torch.LongTensor` and aligns the description with the standard wording used\u2026",
-    "changed_files": 1,
+    "additions": 187,
+    "author": "remi-or",
+    "author_association": "MEMBER",
+    "body_excerpt": "## Summary This PR adds the `return_logprobs` flag to the continuous batching, enabling the user to retrieve the log probabilites of the tokens generated. # Tests Added a test to compare with regular generate and it passes. All tests pass.\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44590",
-    "created_at": "2026-03-11T05:13:57Z",
-    "deletions": 2,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44835",
+    "created_at": "2026-03-18T17:48:15Z",
+    "deletions": 83,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44590/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44590",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44590,
+    "files_url": "https://github.com/huggingface/transformers/pull/44835/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44835",
+    "labels": [],
+    "merged": true,
+    "number": 44835,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix incorrect docstring for position_ids",
-    "updated_at": "2026-03-11T21:08:42Z"
+    "title": "[CB] Add an option to return logprobs",
+    "updated_at": "2026-03-23T18:35:31Z"
   },
   {
-    "additions": 1,
-    "author": "jiqing-feng",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Fixes Llama4 model loading under BitsAndBytes (BNB) quantization mode. Router quantized incorrectly causes shape mismatch: Llama4Router inherits from nn.Linear, so BNB quantizes its weight into a packed format. However, super().forward() c\u2026",
-    "changed_files": 1,
+    "additions": 0,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Oups this slipped through in https://github.com/huggingface/transformers/pull/44833",
+    "changed_files": 7,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 14,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44588",
-    "created_at": "2026-03-11T01:42:33Z",
-    "deletions": 1,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44834",
+    "created_at": "2026-03-18T17:07:11Z",
+    "deletions": 11,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44588/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44588",
+    "files_url": "https://github.com/huggingface/transformers/pull/44834/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44834",
     "labels": [],
-    "merged": false,
-    "number": 44588,
-    "review_comments_count": 4,
-    "state": "open",
-    "title": "Fix llama4 bnb mode",
-    "updated_at": "2026-03-25T07:49:10Z"
+    "merged": true,
+    "number": 44834,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Update more modular examples",
+    "updated_at": "2026-03-18T17:18:54Z"
   },
   {
-    "additions": 32,
-    "author": "kmbhattt-aws",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 2,
+    "additions": 299,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fix the examples after the config change (https://github.com/huggingface/transformers/pull/41250), and re-run conversion as in general modelings changed quite a bit in the lib.",
+    "changed_files": 19,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44587",
-    "created_at": "2026-03-11T01:01:18Z",
-    "deletions": 20,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44587/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44587",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44833",
+    "created_at": "2026-03-18T16:35:34Z",
+    "deletions": 590,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44833/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44833",
     "labels": [],
-    "merged": false,
-    "number": 44587,
+    "merged": true,
+    "number": 44833,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Fix: Handling fused qkv result tensor slicing for tp sharded qkv weights",
-    "updated_at": "2026-03-12T21:31:29Z"
+    "state": "closed",
+    "title": "Fix and re-run modular converter on examples",
+    "updated_at": "2026-03-18T17:00:44Z"
   },
   {
-    "additions": 91,
-    "author": "mvanhorn",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Decouples router logits collection from output visibility in Mixtral's `ForCausalLM`. Previously, `output_router_logits=False` (the default) prevented `aux_loss` from being computed, meaning load balancing was silen\u2026",
-    "changed_files": 13,
+    "additions": 346,
+    "author": "IlyasMoutawwakil",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44586",
-    "created_at": "2026-03-11T00:24:07Z",
-    "deletions": 39,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44832",
+    "created_at": "2026-03-18T15:33:15Z",
+    "deletions": 176,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44586/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44586",
+    "files_url": "https://github.com/huggingface/transformers/pull/44832/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44832",
     "labels": [],
-    "merged": false,
-    "number": 44586,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44832,
+    "review_comments_count": 37,
     "state": "closed",
-    "title": "Fix Mixtral aux_loss not computed when output_router_logits=False",
-    "updated_at": "2026-03-11T14:31:21Z"
+    "title": "DeepGEMM",
+    "updated_at": "2026-03-31T15:04:06Z"
   },
   {
-    "additions": 10,
-    "author": "mvanhorn",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Passes `eps=config.rms_norm_eps` to both `q_a_layernorm` and `kv_a_layernorm` in the DeepseekV3 MLA attention module. Without this, these layernorms default to `eps=1e-5` instead of the config value (`1e-6`), causin\u2026",
-    "changed_files": 5,
+    "additions": 2,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fix loading in Sam3 which currently doesn't match the state dict keys from checkpoint. Adding a correct base model prefix will add it to all state dict keys, making the ckpt load-able <!-- Congratulations! You've ma\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44585",
-    "created_at": "2026-03-11T00:20:54Z",
-    "deletions": 10,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44585/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44585",
-    "labels": [],
-    "merged": false,
-    "number": 44585,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "Fix missing rms_norm_eps in DeepseekV3 MLA layernorms",
-    "updated_at": "2026-03-12T14:39:12Z"
-  },
-  {
-    "additions": 1,
-    "author": "mvanhorn",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes an off-by-one error in `decode_spans()` in the document question answering pipeline that causes a `ValueError: kth(=N) out of bounds` crash when `len(scores_flat) == topk`. The boundary check on line 97 uses `\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44584",
-    "created_at": "2026-03-10T23:52:51Z",
-    "deletions": 1,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44831",
+    "created_at": "2026-03-18T14:50:48Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44584/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44584",
+    "files_url": "https://github.com/huggingface/transformers/pull/44831/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44831",
     "labels": [],
     "merged": true,
-    "number": 44584,
+    "number": 44831,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix off-by-one in decode_spans boundary check",
-    "updated_at": "2026-03-12T13:22:10Z"
+    "title": "Fix loading issue in Sam3",
+    "updated_at": "2026-03-18T15:44:01Z"
   },
   {
-    "additions": 45,
-    "author": "wilnn",
+    "additions": 2070,
+    "author": "lashahub",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "\u2026kpoint when `save_strategy` is `best` # What does this PR do? fix load_best_model_checkpoint_at_end do not load the best model checkpoint at the end when `save_strategy` is `\"best\"` Fixes # (issue) fix load_best_model_checkpoint_at_end do\u2026",
-    "changed_files": 5,
+    "body_excerpt": "This PR adds `AudioFlamingoNext` as a separate model name that inherits directly from `MusicFlamingo` #43538 and keeps the same architecture and behavior. Changes: - add `audioflamingonext` model files - register it in the auto mappings -\u2026",
+    "changed_files": 21,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44583",
-    "created_at": "2026-03-10T22:37:36Z",
-    "deletions": 16,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44583/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44583",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44830",
+    "created_at": "2026-03-18T14:31:45Z",
+    "deletions": 6,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44830/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44830",
     "labels": [],
-    "merged": true,
-    "number": 44583,
-    "review_comments_count": 2,
-    "state": "closed",
-    "title": "fix load_best_model_checkpoint_at_end do not load the best model chec\u2026",
-    "updated_at": "2026-03-24T15:42:26Z"
+    "merged": false,
+    "number": 44830,
+    "review_comments_count": 1,
+    "state": "open",
+    "title": "Add AudioFlamingoNext model",
+    "updated_at": "2026-04-06T02:08:22Z"
   },
   {
-    "additions": 3,
-    "author": "yonigozlan",
+    "additions": 101,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fix an issue introduced in #42564 . The refactor embedded raw image tokens instead of BPE tokens, causing the model to output gibberish. This fix adds back the image tokens to BPE tokens conversion before embedding.\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? Last batch finally! Follow up of https://github.com/huggingface/transformers/pull/44759 and many other",
+    "changed_files": 33,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44582",
-    "created_at": "2026-03-10T21:00:30Z",
-    "deletions": 1,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44828",
+    "created_at": "2026-03-18T13:52:32Z",
+    "deletions": 512,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44582/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44582",
+    "files_url": "https://github.com/huggingface/transformers/pull/44828/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44828",
     "labels": [],
     "merged": true,
-    "number": 44582,
+    "number": 44828,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix missing BPE token conversion step in Chameleon",
-    "updated_at": "2026-03-11T11:26:49Z"
+    "title": "Remove cache_position in more models (4 and last one)",
+    "updated_at": "2026-03-18T16:02:46Z"
   },
   {
-    "additions": 9,
-    "author": "harshaljanjani",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? The following issues were identified and fixed in this PR: \u2192 **Llama-4 Vision:** [freqs_ci is stored as a plain attr](https://github.com/huggingface/transformers/blob/153894c013/src/transformers/models/llama4/mode\u2026",
-    "changed_files": 2,
+    "additions": 80,
+    "author": "3outeille",
+    "author_association": "MEMBER",
+    "body_excerpt": "https://github.com/huggingface/transformers/pull/44825",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44581",
-    "created_at": "2026-03-10T19:33:51Z",
-    "deletions": 2,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44827",
+    "created_at": "2026-03-18T13:36:53Z",
+    "deletions": 14,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44581/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44581",
+    "files_url": "https://github.com/huggingface/transformers/pull/44827/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44827",
     "labels": [],
-    "merged": true,
-    "number": 44581,
-    "review_comments_count": 2,
-    "state": "closed",
-    "title": "fix(models, testing): Fix Llama4 vision rotary meta tensor initialization and MyT5 get_tokenizer signature",
-    "updated_at": "2026-03-13T16:13:09Z"
+    "merged": false,
+    "number": 44827,
+    "review_comments_count": 6,
+    "state": "open",
+    "title": "Fix Mistral4 tests",
+    "updated_at": "2026-03-26T16:04:20Z"
   },
   {
-    "additions": 16,
-    "author": "rabbierabbie",
-    "author_association": "NONE",
-    "body_excerpt": "This PR clarifies references to the **Transformers library** in the README. While reading the documentation as a new user, I initially found the term **\"Transformers\"** ambiguous because it could refer either to the **Transformer architect\u2026",
-    "changed_files": 1,
+    "additions": 55,
+    "author": "BillionClaw",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Fixes #44821 This PR fixes the issue where `AutoImageProcessor.from_pretrained()` was unable to load from a URL (e.g., `https://huggingface.co/.../raw/main/config.json`). The bug was introduced in transformers>=5.3.0. Prior versions (e.g.,\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44580",
-    "created_at": "2026-03-10T18:21:55Z",
-    "deletions": 16,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44826",
+    "created_at": "2026-03-18T12:08:35Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44580/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44580",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44826/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44826",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44580,
+    "number": 44826,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Clarify references to the Transformers library in README",
-    "updated_at": "2026-03-11T13:24:58Z"
+    "title": "fix: allow AutoImageProcessor to load from URL",
+    "updated_at": "2026-03-19T13:28:04Z"
   },
   {
     "additions": 1,
-    "author": "ydshieh",
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? We need to fetch the specific commit (the so called merge commit created by Github itself)",
+    "body_excerpt": "# What does this PR do? As per the title. cc @3outeille as I know you're looking into it",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44579",
-    "created_at": "2026-03-10T16:58:09Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44825",
+    "created_at": "2026-03-18T12:05:35Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44579/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44579",
+    "files_url": "https://github.com/huggingface/transformers/pull/44825/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44825",
     "labels": [],
     "merged": true,
-    "number": 44579,
+    "number": 44825,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix PR comment CI for quantization job",
-    "updated_at": "2026-03-10T17:07:11Z"
+    "title": "[CI] Temporarily skip Mistral4 tests as they almost all fail",
+    "updated_at": "2026-03-18T12:15:34Z"
   },
   {
-    "additions": 449,
-    "author": "remi-or",
-    "author_association": "MEMBER",
-    "body_excerpt": "# Summary This PR adds three attributes to the compile config, to have granularity over how varlen (handles mixed prefil and decode batches) and decode (only decode batches) are compiled. We want to have this kind of granularity because va\u2026",
-    "changed_files": 11,
+    "additions": 5,
+    "author": "BillionClaw",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Description Fixes #44737 The `relative_positional_encoding` function in XLNet was computing all positional encodings on CPU every forward pass because the `torch.arange` calls were missing the `device` parameter. ## Changes - Added devi\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44578",
-    "created_at": "2026-03-10T16:31:20Z",
-    "deletions": 121,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44824",
+    "created_at": "2026-03-18T11:55:01Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44578/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44578",
-    "labels": [],
-    "merged": true,
-    "number": 44578,
-    "review_comments_count": 4,
+    "files_url": "https://github.com/huggingface/transformers/pull/44824/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44824",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44824,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[CB] Better parametrization for compile",
-    "updated_at": "2026-03-19T11:50:08Z"
+    "title": "fix(xlnet): add device parameter to relative_positional_encoding",
+    "updated_at": "2026-03-18T13:17:38Z"
   },
   {
-    "additions": 5,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "- Fix quantizer_aqlm.py to use renamed modules_to_not_convert parameter instead of removed linear_weights_not_to_quantize - Update test to match new function signature: no tuple return, module names instead of weight names",
+    "additions": 41,
+    "author": "BillionClaw",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "This PR fixes #44821 where `AutoImageProcessor.from_pretrained()` couldn't load from a direct URL to a config file. ## Problem When passing a URL like `https://huggingface.co/jinfengxie/BFMS_1014/raw/main/config.json` to `AutoImageProcesso\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 9,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44577",
-    "created_at": "2026-03-10T15:57:36Z",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44823",
+    "created_at": "2026-03-18T11:54:24Z",
     "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44577/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44577",
-    "labels": [],
-    "merged": true,
-    "number": 44577,
+    "files_url": "https://github.com/huggingface/transformers/pull/44823/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44823",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44823,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix: AQLM quantizer to match updated replace_with_aqlm_linear signature",
-    "updated_at": "2026-03-10T17:48:00Z"
+    "title": "fix: AutoImageProcessor from URL loading",
+    "updated_at": "2026-03-18T13:17:48Z"
   },
   {
-    "additions": 16,
-    "author": "SunMarc",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/44387. This PR disable async loading when we want to quantize the model. it is actually faster than doing a semaphore. If a quantizer happens to quantize fast\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
+    "additions": 4,
+    "author": "BillionClaw",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Fixes #44737 The relative_positional_encoding method in XLNetModel was creating tensors using torch.arange() without specifying device=, causing the entire sinusoidal positional encoding computation to run on CPU every forward pass. Only t\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 10,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44576",
-    "created_at": "2026-03-10T15:07:01Z",
-    "deletions": 5,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44822",
+    "created_at": "2026-03-18T11:48:28Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44576/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44576",
-    "labels": [],
-    "merged": true,
-    "number": 44576,
-    "review_comments_count": 1,
+    "files_url": "https://github.com/huggingface/transformers/pull/44822/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44822",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44822,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Disable async loading when quantizing on the fly",
-    "updated_at": "2026-03-16T16:36:42Z"
+    "title": "fix: XLNet relative_positional_encoding device placement",
+    "updated_at": "2026-03-18T13:17:30Z"
   },
   {
-    "additions": 13,
-    "author": "Rocketknight1",
-    "author_association": "MEMBER",
-    "body_excerpt": "Some parameters in Tapas are initialized in `__init__()` and not reinitialized in `_init_weights()`, which means that if the model is created on the `meta` device, those parameters do not get a weight initialization. This causes a crash la\u2026",
+    "additions": 14,
+    "author": "BillionClaw",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Fixes #44811 ## Problem When calling `processor.batch_decode(predicted_ids, skip_special_tokens=False)` with the output from `model.generate()` (without `return_dict_in_generate=True`), the `skip_special_tokens` parameter was being ignored\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44575",
-    "created_at": "2026-03-10T14:42:40Z",
-    "deletions": 16,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44820",
+    "created_at": "2026-03-18T10:57:12Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44575/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44575",
-    "labels": [],
-    "merged": true,
-    "number": 44575,
+    "files_url": "https://github.com/huggingface/transformers/pull/44820/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44820",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44820,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Correct Tapas initialization",
-    "updated_at": "2026-03-10T15:14:37Z"
+    "title": "fix(whisper): respect skip_special_tokens in batch_decode",
+    "updated_at": "2026-03-18T13:17:20Z"
   },
   {
-    "additions": 33,
-    "author": "ArthurZucker",
-    "author_association": "MEMBER",
-    "body_excerpt": "Fixes #44448",
-    "changed_files": 4,
-    "cluster_id": "cluster-44053-8",
-    "cluster_ids": [
-      "cluster-44053-8"
-    ],
-    "cluster_role": "canonical",
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44571",
-    "created_at": "2026-03-10T14:28:22Z",
-    "deletions": 12,
+    "additions": 2,
+    "author": "BillionClaw",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Description This PR fixes the DeepSeek tokenizer issue (#44779) where tokenization produces incorrect results in v5. ### Problem In transformers v5, the DeepSeek tokenizer (DeepSeek-R1) was producing incorrect results: - Input: \"How are\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44819",
+    "created_at": "2026-03-18T10:55:22Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44571/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44571",
-    "labels": [],
-    "merged": true,
-    "number": 44571,
+    "files_url": "https://github.com/huggingface/transformers/pull/44819/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44819",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44819,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix pegasus conversion",
-    "updated_at": "2026-03-18T09:55:00Z"
+    "title": "fix(tokenizer): add deepseek_v2 and deepseek_v3 to incorrect hub tokenizer class list",
+    "updated_at": "2026-03-18T14:11:16Z"
   },
   {
-    "additions": 30,
-    "author": "umbilnm",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? In transformers v5, `DebertaV2Tokenizer` was rewritten to use `TokenizersBackend`, but the `post_processor` responsible for adding `[CLS]`/`[SEP]` tokens was never set. This causes `add_special_tokens=True` to silen\u2026",
-    "changed_files": 2,
+    "additions": 64,
+    "author": "BillionClaw",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Description Fixes #44805 When training multimodal models (Qwen3-VL, GLM-4.6V, Qwen3-VL-MoE) with LoRA adapters, the `attention_mask` and `mm_token_type_ids` tensors can have different shapes. This causes an IndexError when the `get_rope\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 11,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44570",
-    "created_at": "2026-03-10T13:37:06Z",
-    "deletions": 1,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44818",
+    "created_at": "2026-03-18T10:46:22Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44570/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44570",
-    "labels": [],
-    "merged": true,
-    "number": 44570,
-    "review_comments_count": 1,
+    "files_url": "https://github.com/huggingface/transformers/pull/44818/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44818",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44818,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix missing post_processor in DebertaV2Tokenizer causing no special t\u2026",
-    "updated_at": "2026-03-24T09:40:44Z"
+    "title": "fix: resolve mask shape mismatch IndexError in multimodal VL models",
+    "updated_at": "2026-03-18T10:51:43Z"
   },
   {
-    "additions": 267,
-    "author": "aashay-sarvam",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Adds native support for the `sarvam_mla` model type (`sarvamai/sarvam-105b`) to HuggingFace Transformers using the **modular pattern**, inheriting from DeepSeek V3. ### Model Architecture SarvamMLA is a **105B para\u2026",
-    "changed_files": 10,
+    "additions": 28,
+    "author": "JJJYmmm",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fix https://github.com/QwenLM/Qwen3.5/issues/97. This PR adds `enable_thinking` to the chat-template kwargs. With this change, `enable_thinking` is treated as a template-level argument in the tokenize=True path, so\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44569",
-    "created_at": "2026-03-10T11:55:01Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44817",
+    "created_at": "2026-03-18T10:44:11Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44569/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44569",
+    "files_url": "https://github.com/huggingface/transformers/pull/44817/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44817",
     "labels": [],
     "merged": false,
-    "number": 44569,
-    "review_comments_count": 20,
-    "state": "open",
-    "title": "Add SarvamMLA model (sarvamai/sarvam-105b)",
-    "updated_at": "2026-03-18T13:58:18Z"
+    "number": 44817,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[Misc] add enable_thinking to template kwargs",
+    "updated_at": "2026-03-20T14:56:04Z"
   },
   {
-    "additions": 2,
-    "author": "tomaarsen",
+    "additions": 98,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Currently, when using Apertus (or rather, it's `XIELUActivation`), and you don't have `xielu` installed, then you'll fall to this `except:` https://github.com/huggingface/transformers/blob/5a098a1e01034095f037c8a37f\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? As per the title. This is the last of many PR to remove the `cache_position`. At this point, all the models were already updated to not use them, and they are fully ignored in all the modelings. So this removes thei\u2026",
+    "changed_files": 57,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44567",
-    "created_at": "2026-03-10T11:39:20Z",
-    "deletions": 3,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44816",
+    "created_at": "2026-03-18T10:32:04Z",
+    "deletions": 375,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44567/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44567",
+    "files_url": "https://github.com/huggingface/transformers/pull/44816/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44816",
     "labels": [],
     "merged": true,
-    "number": 44567,
-    "review_comments_count": 0,
+    "number": 44816,
+    "review_comments_count": 14,
     "state": "closed",
-    "title": "[`fix`] Prevent crash with Apertus without xielu installed",
-    "updated_at": "2026-03-10T13:24:11Z"
+    "title": "[generate] Never use `cache_position` anymore in generation",
+    "updated_at": "2026-03-19T14:18:28Z"
   },
   {
-    "additions": 188,
-    "author": "tarekziade",
+    "additions": 135,
+    "author": "ArthurZucker",
     "author_association": "MEMBER",
-    "body_excerpt": "This patch extends `ty` check to `src/transformers/cli` Based on https://github.com/huggingface/transformers/pull/44412",
-    "changed_files": 10,
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44566",
-    "created_at": "2026-03-10T10:40:13Z",
-    "deletions": 101,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44566/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44566",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44815",
+    "created_at": "2026-03-18T09:54:18Z",
+    "deletions": 23,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44815/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44815",
     "labels": [],
     "merged": false,
-    "number": 44566,
-    "review_comments_count": 21,
+    "number": 44815,
+    "review_comments_count": 2,
     "state": "open",
-    "title": "chore(typing): extend typing to `src/transformers/cli` ",
-    "updated_at": "2026-03-20T08:12:52Z"
+    "title": "Dequant fix",
+    "updated_at": "2026-03-24T14:39:52Z"
   },
   {
-    "additions": 36,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. The unused memory is taken into account too late, which leads to different device_map for the same hardware and models, and even random cuda OOM!! Basically, the max memory needs to be adjusted BEF\u2026",
+    "additions": 2,
+    "author": "BillionClaw",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Fixes #44792 This PR fixes the failing test `test_model_generate_images` for the Janus model. ## Problem When generating images with the Janus model, `generation_config.num_return_sequences` and `generation_config.max_length` can be `None`\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44565",
-    "created_at": "2026-03-10T10:31:10Z",
-    "deletions": 22,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44814",
+    "created_at": "2026-03-18T09:51:34Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44565/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44565",
+    "files_url": "https://github.com/huggingface/transformers/pull/44814/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44814",
     "labels": [],
-    "merged": true,
-    "number": 44565,
-    "review_comments_count": 2,
+    "merged": false,
+    "number": 44814,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[device_map] Fix device_map computation by correctly adjusting memory available",
-    "updated_at": "2026-03-10T17:16:01Z"
+    "title": "fix(janus): handle None values in image generation mode",
+    "updated_at": "2026-03-18T10:42:50Z"
   },
   {
-    "additions": 2,
-    "author": "ArthurZucker",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixes #44360",
-    "changed_files": 2,
+    "additions": 20,
+    "author": "hf-security-analysis[bot]",
+    "author_association": "NONE",
+    "body_excerpt": "Update `.github/workflows/pr-repo-consistency-bot.yml` workflow configuration. cc @ydshieh Closes huggingface/tracking-issues#26",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44564",
-    "created_at": "2026-03-10T10:14:21Z",
-    "deletions": 2,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44813",
+    "created_at": "2026-03-18T09:49:16Z",
+    "deletions": 46,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44564/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44564",
+    "files_url": "https://github.com/huggingface/transformers/pull/44813/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44813",
     "labels": [],
-    "merged": true,
-    "number": 44564,
+    "merged": false,
+    "number": 44813,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix glm dsa",
-    "updated_at": "2026-03-19T15:13:36Z"
+    "title": "chore: update pr-repo-consistency-bot.yml",
+    "updated_at": "2026-03-18T10:08:04Z"
   },
   {
-    "additions": 165,
-    "author": "zucchini-nlp",
+    "additions": 7,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Split out `mm_token_type_id` creation to a separate utility and just call it in VLMs. Also make sure that `mm_token_type_id` can be created even when `padding=False` and the inputs are of different length. As long a\u2026",
-    "changed_files": 37,
+    "body_excerpt": "# What does this PR do? Some checks (for example, modular checks) really require the installation from PR branch.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44563",
-    "created_at": "2026-03-10T10:13:29Z",
-    "deletions": 267,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44812",
+    "created_at": "2026-03-18T09:40:36Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44563/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44563",
+    "files_url": "https://github.com/huggingface/transformers/pull/44812/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44812",
     "labels": [],
     "merged": true,
-    "number": 44563,
-    "review_comments_count": 4,
+    "number": 44812,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Allow `mm_token_type` be non-padded lists ",
-    "updated_at": "2026-03-25T11:33:46Z"
+    "title": "Fix repo-check bot",
+    "updated_at": "2026-03-18T09:49:50Z"
   },
   {
-    "additions": 54,
-    "author": "SamArun28",
-    "author_association": "NONE",
-    "body_excerpt": "#Standardizing the BERT model card as part of issue #36979 Changes made: - Added friendly description of BERT - Added Pipeline and AutoModel code examples - Added Notes section with helpful tips - Added Resources section with links @stevhl\u2026",
-    "changed_files": 1,
+    "additions": 7,
+    "author": "LincolnBurrows2017",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Fixed bare except clause in _safe_convert_tensor function to catch only Exception type.",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44562",
-    "created_at": "2026-03-10T09:58:19Z",
-    "deletions": 111,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44809",
+    "created_at": "2026-03-18T05:47:03Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44562/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44562",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44809/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44809",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44562,
+    "number": 44809,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "docs: standardize BERT model card",
-    "updated_at": "2026-03-10T16:00:43Z"
+    "title": "fix: replace bare except with Exception in Fuyu image processing",
+    "updated_at": "2026-03-18T13:17:11Z"
   },
   {
-    "additions": 368,
-    "author": "rain-1",
+    "additions": 2759,
+    "author": "zhang-prog",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": null,
+    "changed_files": 31,
+    "cluster_id": "cluster-43098-11",
+    "cluster_ids": [
+      "cluster-43098-11"
+    ],
+    "cluster_role": "member",
+    "comments_count": 12,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44808",
+    "created_at": "2026-03-18T04:29:07Z",
+    "deletions": 21,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44808/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44808",
+    "labels": [
+      "New model"
+    ],
+    "merged": true,
+    "number": 44808,
+    "review_comments_count": 53,
+    "state": "closed",
+    "title": "[Model] Add PP-OCRv5_server_rec and  PP-OCRv5_mobile_rec models Support",
+    "updated_at": "2026-03-18T20:24:50Z"
+  },
+  {
+    "additions": 0,
+    "author": "BillionClaw",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "> Adds support for the legacy text completions endpoint, which accepts a freeform text prompt (no chat template) and returns generated text in choices[].text. Supports both streaming and non-streaming modes, suffix for fill-in-the-middle i\u2026",
+    "body_excerpt": "The question-answering pipeline was removed in v5.3 per the migration guide, but the Chinese, Korean, and French quicktour docs still listed it as an available pipeline task. This removes those outdated references to avoid confusing users\u2026",
     "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44558",
-    "created_at": "2026-03-10T07:09:07Z",
-    "deletions": 0,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44807",
+    "created_at": "2026-03-18T03:41:52Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44558/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44558",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44807/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44807",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44558,
-    "review_comments_count": 3,
-    "state": "open",
-    "title": "Add /v1/completions endpoint (OpenAI legacy completions API) to `transformers serve`",
-    "updated_at": "2026-03-23T07:30:41Z"
+    "number": 44807,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "docs(quicktour): remove question-answering pipeline from quicktour tables",
+    "updated_at": "2026-03-18T15:38:09Z"
   },
   {
-    "additions": 1,
-    "author": "black-yt",
-    "author_association": "NONE",
-    "body_excerpt": "This PR fixes a runtime `TypeError` encountered during model initialization when using Qwen3.5 configurations with recent `transformers` versions. The error occurs in `modeling_rope_utils.py` during RoPE parameter validation: ``` TypeError\u2026",
+    "additions": 23,
+    "author": "BillionClaw",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "This PR fixes OOM errors when loading models on AMD Strix Halo APUs. ## Problem AMD Strix Halo (Radeon 8060S/8050S) uses unified memory architecture where memory-mapped file loading doesn't work well with the current amdgpu driver. This ca\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 13,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44555",
-    "created_at": "2026-03-10T01:52:18Z",
-    "deletions": 1,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44806",
+    "created_at": "2026-03-18T03:33:59Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44555/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44555",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44806/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44806",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44555,
+    "number": 44806,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix TypeError in RoPE validation when ignore_keys_at_rope_validation is a list",
-    "updated_at": "2026-03-12T13:16:57Z"
+    "title": "fix(core_model_loading): disable mmap on Strix Halo to avoid OOM",
+    "updated_at": "2026-03-18T12:30:21Z"
   },
   {
-    "additions": 233,
-    "author": "vasqu",
+    "additions": 137,
+    "author": "stevhliu",
     "author_association": "MEMBER",
-    "body_excerpt": "As per title, wip and really unsure if we really want this",
-    "changed_files": 16,
+    "body_excerpt": "updates the peft docs: - a more complete training section with a full code snippet, describe saving behavior, resuming from a checkpoint, and distributed training - adds some undocumented API methods (`delete_adapter`, `active_adapters`) -\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44553",
-    "created_at": "2026-03-10T01:04:49Z",
-    "deletions": 263,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44553/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44553",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44804",
+    "created_at": "2026-03-18T00:08:54Z",
+    "deletions": 89,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44804/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44804",
     "labels": [],
-    "merged": false,
-    "number": 44553,
-    "review_comments_count": 1,
-    "state": "open",
-    "title": "[`FA`] Refactor FA CB kwargs",
-    "updated_at": "2026-03-17T09:14:21Z"
+    "merged": true,
+    "number": 44804,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "[docs] peft",
+    "updated_at": "2026-03-23T17:14:58Z"
   },
   {
-    "additions": 0,
-    "author": "ydshieh",
+    "additions": 1341,
+    "author": "yonigozlan",
     "author_association": "MEMBER",
-    "body_excerpt": "Reverts huggingface/transformers#44529",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? (Finally) add support for checking+fixing both generated files and modular files in `check_auto_docstrings`. Also `auto_docstring` was recently added to configs, and this PR updates `check_auto_docstrings` to suppor\u2026",
+    "changed_files": 244,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44552",
-    "created_at": "2026-03-09T21:05:46Z",
-    "deletions": 3,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44803",
+    "created_at": "2026-03-17T22:40:45Z",
+    "deletions": 1105,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44552/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44552",
+    "files_url": "https://github.com/huggingface/transformers/pull/44803/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44803",
     "labels": [],
     "merged": true,
-    "number": 44552,
-    "review_comments_count": 0,
+    "number": 44803,
+    "review_comments_count": 24,
     "state": "closed",
-    "title": "Revert \"test merge queue 1\"",
-    "updated_at": "2026-03-09T21:15:55Z"
+    "title": "Support Modular (!!) + Configs in `check_auto_docstrings`",
+    "updated_at": "2026-03-24T17:59:12Z"
   },
   {
     "additions": 12,
-    "author": "echarlaix",
+    "author": "itazap",
     "author_association": "MEMBER",
-    "body_excerpt": "`self.rotary_emb` is always called since https://github.com/huggingface/transformers/pull/39847 while only being initialized when `config.use_mem_rope` is True inference failing since v5 for models `config.use_mem_rope=False` ``` Attribute\u2026",
+    "body_excerpt": "deepseek_v2, deepseek_v3, and modernbert fix for having incorrect tokenizer class on the hub fixes: https://github.com/huggingface/transformers/issues/44779, https://github.com/huggingface/transformers/pull/44783",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44551",
-    "created_at": "2026-03-09T18:15:17Z",
-    "deletions": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44801",
+    "created_at": "2026-03-17T17:40:25Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44551/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44551",
+    "files_url": "https://github.com/huggingface/transformers/pull/44801/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44801",
     "labels": [],
     "merged": true,
-    "number": 44551,
+    "number": 44801,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix zamba2 rotary embedding call when use_mem_rope is False",
-    "updated_at": "2026-03-10T11:43:36Z"
+    "title": "deepseek_v2, deepseek_v3, and modernbert fix for having incorrect tokenizer class on the hub",
+    "updated_at": "2026-03-19T13:11:54Z"
   },
   {
-    "additions": 6,
-    "author": "himani27301",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Improved wording and grammar in the Auto Classes documentation to enhance readability without changing functionality. # What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, you\u2026",
-    "changed_files": 1,
+    "additions": 36,
+    "author": "aayushbaluni",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Fixes #44488 `CamembertTokenizer` raised `ValueError: too many values to unpack (expected 2)` when loading models like `cjvt/sleng-bert` that provide vocab as a dict `{token: id}` from `tokenizer.json` (BPE format). The tokenize\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44550",
-    "created_at": "2026-03-09T16:12:59Z",
-    "deletions": 16,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44800",
+    "created_at": "2026-03-17T17:20:35Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44550/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44550",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44800/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44800",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44550,
-    "review_comments_count": 1,
-    "state": "open",
-    "title": "Improve clarity and grammar in Auto Classes documentation",
-    "updated_at": "2026-03-09T16:32:29Z"
-  },
-  {
-    "additions": 158,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? The previous values are simply wrong. (which also cause problems for tiny model creation)",
-    "changed_files": 51,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44549",
-    "created_at": "2026-03-09T15:44:36Z",
-    "deletions": 126,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44549/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44549",
-    "labels": [],
-    "merged": true,
-    "number": 44549,
+    "number": 44800,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix wrong (non-existing) checkpoints",
-    "updated_at": "2026-03-11T15:16:56Z"
+    "title": "fix: handle dict vocab in CamembertTokenizer for tokenizer.json (#44488)",
+    "updated_at": "2026-03-18T15:37:54Z"
   },
   {
-    "additions": 12,
-    "author": "mvanhorn",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Fixes #43935 ## Summary - Added `eval_on_end` argument to `TrainingArguments` (default: `False`) - Added conditional evaluation at the end of training in `Trainer.train()`, symmetric to `eval_on_start` - Implementation mirrors the existing\u2026",
-    "changed_files": 2,
+    "additions": 333,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "updates the Hardware section of the docs for training: - combined CPU/Distributed CPU into a single doc - add more info to the Gaudi doc (mixed precision, torch.compile, distributed training) - add more info to the MPS doc (mixed precision\u2026",
+    "changed_files": 9,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44548",
-    "created_at": "2026-03-09T15:03:27Z",
-    "deletions": 2,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44799",
+    "created_at": "2026-03-17T17:19:51Z",
+    "deletions": 627,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44548/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44548",
+    "files_url": "https://github.com/huggingface/transformers/pull/44799/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44799",
     "labels": [],
     "merged": false,
-    "number": 44548,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Add eval_on_end flag to Trainer",
-    "updated_at": "2026-03-09T19:31:59Z"
+    "number": 44799,
+    "review_comments_count": 19,
+    "state": "open",
+    "title": "[docs] training on specific hardware",
+    "updated_at": "2026-03-23T09:09:32Z"
   },
   {
-    "additions": 2,
-    "author": "mvanhorn",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Fixes #44373 ## Summary - Corrected the docstring for `position_ids` parameter in `prepare_fa_kwargs_from_position_ids` and `_prepare_from_posids` which incorrectly described attention mask semantics (\"Boolean or int tensor... 1 means vali\u2026",
+    "additions": 17,
+    "author": "divyanks",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44547",
-    "created_at": "2026-03-09T14:59:27Z",
-    "deletions": 2,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44798",
+    "created_at": "2026-03-17T16:51:46Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44547/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44547",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44798/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44798",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44547,
-    "review_comments_count": 3,
-    "state": "open",
-    "title": "Fix position_ids docstring in modeling_flash_attention_utils.py",
-    "updated_at": "2026-03-09T16:08:29Z"
+    "number": 44798,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add MPS (Apple Silicon) example and documentation",
+    "updated_at": "2026-03-18T15:37:09Z"
   },
   {
-    "additions": 4,
-    "author": "Abdennacer-Badaoui",
+    "additions": 1,
+    "author": "vasqu",
     "author_association": "MEMBER",
-    "body_excerpt": "The AMD Docker image build (`latest-pytorch-amd`) has been failing since early February due to the 6h GitHub Actions job time limit being exceeded. The root cause is that Flash Attention is cloned and compiled from the latest commit on the\u2026",
-    "changed_files": 1,
+    "body_excerpt": "Depends on #44887 and kernels being version `12.3` Works OOB with little changes! Example script for demonstration: ```python from transformers import AutoModelForCausalLM, AutoTokenizer fa_version = 4 #model_id = \"openai/gpt-oss-20b\" mode\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44546",
-    "created_at": "2026-03-09T14:15:51Z",
-    "deletions": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44797",
+    "created_at": "2026-03-17T15:35:59Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44546/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44546",
+    "files_url": "https://github.com/huggingface/transformers/pull/44797/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44797",
     "labels": [],
     "merged": true,
-    "number": 44546,
+    "number": 44797,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix AMD Docker image build timeout by pinning Flash Attention commit",
-    "updated_at": "2026-03-09T14:37:50Z"
+    "title": "[`FA4`] Add kernels fallback",
+    "updated_at": "2026-03-20T19:03:24Z"
   },
   {
-    "additions": 18,
-    "author": "Cyrilvallez",
+    "additions": 4522,
+    "author": "SunMarc",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. Fixes https://github.com/huggingface/transformers/issues/44336",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? This PR refactors transformers serve so that it is not in a single file. We split it into multiple files with clear responsabilities. There were 2,293 lines initially in the serve.py file. ``` \u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2026",
+    "changed_files": 15,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44544",
-    "created_at": "2026-03-09T11:38:09Z",
-    "deletions": 18,
+    "comments_count": 15,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44796",
+    "created_at": "2026-03-17T13:04:06Z",
+    "deletions": 3100,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44544/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44544",
+    "files_url": "https://github.com/huggingface/transformers/pull/44796/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44796",
     "labels": [],
     "merged": true,
-    "number": 44544,
-    "review_comments_count": 0,
+    "number": 44796,
+    "review_comments_count": 40,
     "state": "closed",
-    "title": "Fix ansi codes in loading reports when not connected to terminal",
-    "updated_at": "2026-03-09T11:52:16Z"
+    "title": "[refactor] Serving into proper modules",
+    "updated_at": "2026-04-01T16:03:37Z"
   },
   {
-    "additions": 154,
-    "author": "umbilnm",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes #44521 `apply_chat_template` with `return_assistant_tokens_mask=True` returns all-zero masks when multimodal inputs (images/videos) are present. ## Root cause `generation_indices` (character-level positions of\u2026",
-    "changed_files": 2,
+    "additions": 771,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Adds support for a more generic path, aligned with the rest of the loading! model | PR | main ----|-----|---------- \"gdax/Qwen1.5-MoE-A2.7B_gguf\"| 1min 5s |1min 18s",
+    "changed_files": 9,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44543",
-    "created_at": "2026-03-09T10:47:05Z",
-    "deletions": 21,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44543/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44543",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44794",
+    "created_at": "2026-03-17T11:35:33Z",
+    "deletions": 400,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44794/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44794",
     "labels": [],
     "merged": false,
-    "number": 44543,
+    "number": 44794,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Fix assistant_masks for multimodal inputs in apply_chat_template",
-    "updated_at": "2026-03-09T10:47:05Z"
+    "title": "Refacto GGUF weight conversion",
+    "updated_at": "2026-03-17T17:03:08Z"
   },
   {
-    "additions": 3,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title",
+    "additions": 40,
+    "author": "BillionClaw",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes #44792 - Handles None values in Janus model's image generation mode. The `generate()` method for image generation had several places where it assumed certain config values would always be set, causing failure\u2026",
     "changed_files": 2,
-    "cluster_id": "cluster-44053-8",
-    "cluster_ids": [
-      "cluster-44053-8"
-    ],
-    "cluster_role": "member",
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44542",
-    "created_at": "2026-03-09T10:00:29Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44793",
+    "created_at": "2026-03-17T11:29:47Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44542/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44542",
+    "files_url": "https://github.com/huggingface/transformers/pull/44793/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44793",
     "labels": [],
-    "merged": true,
-    "number": 44542,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix backend dependency",
-    "updated_at": "2026-03-09T10:10:24Z"
+    "merged": false,
+    "number": 44793,
+    "review_comments_count": 2,
+    "state": "open",
+    "title": "fix(janus): Handle None values in image generation mode",
+    "updated_at": "2026-03-18T10:43:24Z"
   },
   {
-    "additions": 1,
-    "author": "YangKai0616",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Per the title, an error occurs when `tp_plan` is empty due to [here](https://github.com/huggingface/transformers/blob/701628527ae1ef37473f05f5d94fac7f457a3f8f/src/transformers/core_model_loading.py#L1120): ``` [rank0]: Traceback (most rece\u2026",
+    "additions": 20,
+    "author": "BillionClaw",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? AMD Strix Halo APUs (gfx1151) have a driver bug where safetensors mmap doesn't release memory properly with the unified memory architecture. This causes OOM errors when loading models that should fit in memory (e.g.\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44540",
-    "created_at": "2026-03-09T09:37:36Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44791",
+    "created_at": "2026-03-17T10:33:22Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44540/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44540",
-    "labels": [],
-    "merged": true,
-    "number": 44540,
+    "files_url": "https://github.com/huggingface/transformers/pull/44791/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44791",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44791,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix UnboundLocalError for tp_plan_alt when tp_plan is empty",
-    "updated_at": "2026-03-11T13:42:39Z"
+    "title": "fix: disable mmap on Strix Halo APUs to avoid OOM",
+    "updated_at": "2026-03-18T12:33:05Z"
   },
   {
-    "additions": 1333,
-    "author": "kmswin1",
-    "author_association": "NONE",
-    "body_excerpt": "A.X K1 \ubaa8\ub378 \ucd94\uac00\ud569\ub2c8\ub2e4. \uae30\uc874 inference \uc640 \ub3d9\uc77c\ud55c \uac83 \ud655\uc778\ud588\uc2b5\ub2c8\ub2e4.",
-    "changed_files": 8,
+    "additions": 72,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This patch - adds a simple cache to the model linter so we skip files that did not change and were valid - reworks `Makefile` targets",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44539",
-    "created_at": "2026-03-09T09:03:10Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44790",
+    "created_at": "2026-03-17T08:54:47Z",
+    "deletions": 19,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44539/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44539",
+    "files_url": "https://github.com/huggingface/transformers/pull/44790/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44790",
     "labels": [],
-    "merged": false,
-    "number": 44539,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44790,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Add A.X K1",
-    "updated_at": "2026-03-09T09:04:11Z"
+    "title": "feat: added cache to the model linter",
+    "updated_at": "2026-03-24T15:28:29Z"
   },
   {
-    "additions": 14,
+    "additions": 21,
     "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Follow-up of #44532: we need to change the required status check to the new added job `doc_build_status_check` added in this PR, otherwise the merge queue won't get the required (passing) status and will eventually\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? Some configs from the hub have different types.",
+    "changed_files": 7,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44538",
-    "created_at": "2026-03-09T08:59:12Z",
-    "deletions": 1,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44789",
+    "created_at": "2026-03-17T08:41:30Z",
+    "deletions": 21,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44538/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44538",
+    "files_url": "https://github.com/huggingface/transformers/pull/44789/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44789",
     "labels": [],
     "merged": true,
-    "number": 44538,
-    "review_comments_count": 0,
+    "number": 44789,
+    "review_comments_count": 5,
     "state": "closed",
-    "title": "Add a new job in `build_pr_documentation.yml` (will be the new required job)",
-    "updated_at": "2026-03-09T09:12:25Z"
+    "title": "Fix config loading issues (type issues)",
+    "updated_at": "2026-03-17T09:44:50Z"
   },
   {
-    "additions": 2,
-    "author": "YangKai0616",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Per the title. Supplement skip logic for XPU in the CPU-only tp tests. Hi @IlyasMoutawwakil, please help review, thanks!",
+    "additions": 0,
+    "author": "BillionClaw",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "The pipeline() docstring included an example using the 'question-answering' task, but this task is not in SUPPORTED_TASKS and will raise an error when used. Remove this outdated example to avoid confusing users following the documentation.\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44536",
-    "created_at": "2026-03-09T08:09:41Z",
-    "deletions": 2,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44788",
+    "created_at": "2026-03-17T08:38:25Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44536/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44536",
+    "files_url": "https://github.com/huggingface/transformers/pull/44788/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44788",
     "labels": [],
-    "merged": true,
-    "number": 44536,
+    "merged": false,
+    "number": 44788,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Supplement skip logic for XPU in the CPU-only tp tests",
-    "updated_at": "2026-03-09T10:10:49Z"
+    "title": "docs(pipelines): remove outdated question-answering example",
+    "updated_at": "2026-03-23T17:19:33Z"
   },
   {
-    "additions": 53,
-    "author": "Anakintano",
+    "additions": 4,
+    "author": "BillionClaw",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Problem `Qwen2_5_VLProcessor.apply_chat_template` raises `ValueError: setting an array element with a sequence` when called with a batch of \u22652 conversations that include images under the default `padding=False` setting. **Root cause:**\u2026",
-    "changed_files": 3,
+    "body_excerpt": "The question-answering pipeline was removed in v5.0.0 per MIGRATION_GUIDE_V5.md, but the non-English task guides still referenced it. This updates the Arabic, Chinese, Japanese, and Korean question answering task guides to remove usage of\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44535",
-    "created_at": "2026-03-09T07:54:58Z",
-    "deletions": 10,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44787",
+    "created_at": "2026-03-17T08:24:09Z",
+    "deletions": 66,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44535/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44535",
+    "files_url": "https://github.com/huggingface/transformers/pull/44787/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44787",
     "labels": [],
-    "merged": false,
-    "number": 44535,
+    "merged": true,
+    "number": 44787,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Fix crash in Qwen2_5_VLProcessor when using batched input with padding=False",
-    "updated_at": "2026-03-09T12:44:00Z"
+    "state": "closed",
+    "title": "docs(tasks): remove references to removed question-answering pipeline",
+    "updated_at": "2026-03-17T16:23:50Z"
   },
   {
-    "additions": 0,
-    "author": "stargazerwh",
-    "author_association": "NONE",
-    "body_excerpt": "## Description The 'transformers run' command was removed in Transformers v5, but the documentation still contained references in 62 model documentation files. This PR removes all `<hfoption id='transformers CLI'>` and `<hfoption id='trans\u2026",
-    "changed_files": 62,
+    "additions": 25,
+    "author": "BillionClaw",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "AMD Strix Halo APUs (gfx1151) experience OOM errors when loading large models via safetensors mmap due to unified memory architecture issues. This fix detects Strix Halo GPUs by checking the GPU architecture name (gfx1151) and forces a CPU\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44533",
-    "created_at": "2026-03-09T01:39:32Z",
-    "deletions": 439,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44786",
+    "created_at": "2026-03-17T08:17:32Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44533/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44533",
+    "files_url": "https://github.com/huggingface/transformers/pull/44786/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44786",
     "labels": [],
     "merged": false,
-    "number": 44533,
+    "number": 44786,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "docs: Remove references to removed 'transformers run' CLI command",
-    "updated_at": "2026-03-09T02:33:08Z"
+    "title": "fix(core_model_loading): disable mmap on Strix Halo to avoid OOM",
+    "updated_at": "2026-03-17T10:29:44Z"
   },
   {
-    "additions": 9,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? So we can use `Require Merge Queue` functionoality",
+    "additions": 307,
+    "author": "BillionClaw",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "AMD Strix Halo APUs (e.g., Radeon 8060S) have issues with mmap-based tensor loading from safetensors, causing out-of-memory errors even when sufficient memory is available. This fix: - Adds `is_strix_halo()` helper to detect Strix Halo GPU\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44532",
-    "created_at": "2026-03-08T20:34:05Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44785",
+    "created_at": "2026-03-17T06:55:31Z",
+    "deletions": 83,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44532/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44532",
+    "files_url": "https://github.com/huggingface/transformers/pull/44785/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44785",
     "labels": [],
-    "merged": true,
-    "number": 44532,
+    "merged": false,
+    "number": 44785,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Update `build_pr_documentation` workflow for `merge_group` event",
-    "updated_at": "2026-03-08T20:42:57Z"
+    "title": "fix(model_loading): Disable mmap on Strix Halo to avoid OOM",
+    "updated_at": "2026-03-17T10:28:06Z"
   },
   {
-    "additions": 16,
-    "author": "s-zx",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Fixes `Qwen2_5_VLProcessor.apply_chat_template` crashing with a `ValueError` when called with a batch of conversations with different prompt lengths and `padding=False` (the default). ### Root cause In the `mm_toke\u2026",
-    "changed_files": 2,
+    "additions": 2,
+    "author": "BillionClaw",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "This PR fixes the DeepSeek tokenizer issue where spaces were lost during decoding in Transformers v5. ## Problem DeepSeek V2 and V3 models use SentencePiece tokenization (like Llama) but were falling back to the generic TokenizersBackend i\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44531",
-    "created_at": "2026-03-08T19:38:00Z",
-    "deletions": 10,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44783",
+    "created_at": "2026-03-17T05:58:54Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44531/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44531",
+    "files_url": "https://github.com/huggingface/transformers/pull/44783/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44783",
     "labels": [],
     "merged": false,
-    "number": 44531,
+    "number": 44783,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix Qwen2_5_VLProcessor.apply_chat_template crash on unpadded batched input",
-    "updated_at": "2026-03-09T13:14:02Z"
+    "state": "open",
+    "title": "fix(auto): Map deepseek_v2 and deepseek_v3 to LlamaTokenizer",
+    "updated_at": "2026-03-17T11:12:52Z"
   },
   {
-    "additions": 3,
-    "author": "ydshieh2",
+    "additions": 6,
+    "author": "JiwaniZakir",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Merge queue is enabled, but we need to see if it works well with CircleCI",
+    "body_excerpt": "Fixes #44737 `XLNetModel.relative_positional_encoding` was creating all `torch.arange` tensors on CPU by default, then calling `.to(output_h.device)` at the call site to move them. Adds a `device` parameter to `relative_positional_encoding\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44529",
-    "created_at": "2026-03-08T18:12:54Z",
-    "deletions": 0,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44782",
+    "created_at": "2026-03-17T05:11:36Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44529/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44529",
+    "files_url": "https://github.com/huggingface/transformers/pull/44782/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44782",
     "labels": [],
     "merged": true,
-    "number": 44529,
+    "number": 44782,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "test merge queue 1",
-    "updated_at": "2026-03-09T21:01:09Z"
+    "title": "fix: XLNet: relative_positional_encoding computes on CPU every forward",
+    "updated_at": "2026-03-19T13:30:48Z"
   },
   {
-    "additions": 3,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Merge queue is enabled, but we need to see if it works well with CircleCI",
-    "changed_files": 1,
+    "additions": 5,
+    "author": "bensons",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Some model repos provide `extra_special_tokens` as a list in their tokenizer_config.json, which caused an `AttributeError: 'list' object has no attribute 'keys'`. This converts list inputs to a dict mapping each tok\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44528",
-    "created_at": "2026-03-08T17:54:32Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44528/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44528",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44781",
+    "created_at": "2026-03-17T04:59:02Z",
+    "deletions": 2849,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44781/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44781",
     "labels": [],
     "merged": false,
-    "number": 44528,
+    "number": 44781,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "test merge queue 1",
-    "updated_at": "2026-03-09T20:39:15Z"
+    "state": "open",
+    "title": "Fix `_set_model_specific_special_tokens` to accept list-format `extra_special_tokens`",
+    "updated_at": "2026-03-27T23:19:21Z"
   },
   {
-    "additions": 3,
-    "author": "Sai-Suraj-27",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes these failing [MusicgenStereoIntegrationTests](https://github.com/huggingface/transformers/actions/runs/22606636929/job/65500451700#step:14:7870) ## Before submitting - [ ] This PR fixes a typo or improves the\u2026",
-    "changed_files": 1,
-    "cluster_id": "cluster-43453-9",
-    "cluster_ids": [
-      "cluster-43453-9"
-    ],
-    "cluster_role": "canonical",
-    "comments_count": 8,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44527",
-    "created_at": "2026-03-08T14:26:02Z",
-    "deletions": 1,
+    "additions": 145,
+    "author": "LincolnBurrows2017",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Fixed logic error in is_tiktoken_available function. The original code `return with_blobfile and _is_package_available(\"blobfile\")[0] or True` would always return True due to operator precedence.",
+    "changed_files": 8,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44778",
+    "created_at": "2026-03-16T23:41:29Z",
+    "deletions": 28,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44527/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44527",
-    "labels": [],
-    "merged": true,
-    "number": 44527,
+    "files_url": "https://github.com/huggingface/transformers/pull/44778/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44778",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44778,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix failing `MusicgenStereo` integration tests",
-    "updated_at": "2026-03-10T12:28:39Z"
+    "title": "fix: correct logic error in is_tiktoken_available function",
+    "updated_at": "2026-03-18T13:15:37Z"
   },
   {
-    "additions": 90,
-    "author": "JoursBleu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Add GGUF loading support for MiniMax-M2.1 (456B MoE) model. MiniMax-M2.1 is a large Mixture-of-Experts model with 456B total parameters (45.9B active), 256 experts and 8 experts per token. This PR enables loading it\u2026",
-    "changed_files": 2,
+    "additions": 35,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "adds docs for #43705 (enable bidirectional attention for decoder-only models)",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44526",
-    "created_at": "2026-03-08T09:57:38Z",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44777",
+    "created_at": "2026-03-16T21:58:40Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44526/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44526",
+    "files_url": "https://github.com/huggingface/transformers/pull/44777/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44777",
     "labels": [],
     "merged": true,
-    "number": 44526,
+    "number": 44777,
     "review_comments_count": 1,
     "state": "closed",
-    "title": "Add GGUF support for MiniMax-M2.1 model",
-    "updated_at": "2026-03-18T14:39:20Z"
+    "title": "[docs] is_causal feature",
+    "updated_at": "2026-03-17T19:50:43Z"
   },
   {
-    "additions": 1,
-    "author": "jnMetaCode",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary Fixes a `KeyError` crash in `_parse_type_hint` in `chat_template_utils.py` (line 117). When processing Union types, the code accesses `subtype[\"type\"]` without checking the key exists. `_get_json_schema_type(Any)` returns `{}` (\u2026",
+    "additions": 0,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "the doc-builder is breaking because it can't find `Mistral4ForQuestionAnswering`, which looks like it doesn't exist",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44525",
-    "created_at": "2026-03-08T09:21:27Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44776",
+    "created_at": "2026-03-16T20:43:33Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44525/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44525",
+    "files_url": "https://github.com/huggingface/transformers/pull/44776/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44776",
     "labels": [],
     "merged": true,
-    "number": 44525,
+    "number": 44776,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix KeyError in _parse_type_hint when Union contains Any",
-    "updated_at": "2026-03-09T13:43:23Z"
+    "title": "[fix] mistral 4 docs",
+    "updated_at": "2026-03-16T21:11:29Z"
   },
   {
-    "additions": 1,
-    "author": "jnMetaCode",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary Fixes a bug in `AssistantTracker.is_active()` in `chat_template_utils.py`. After activation via `activate_tracker()`, `_rendered_blocks` and `_generation_indices` are set to list arguments which may be empty `[]`. The `is_active\u2026",
-    "changed_files": 1,
+    "additions": 177,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "refactors the current [Parallelism methods](https://huggingface.co/docs/transformers/main/en/perf_train_gpu_many#zero-data-parallelism-pipeline-parallelism-and-model-parallelism-3d-parallelism) doc to: - focus on practical examples of comb\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44524",
-    "created_at": "2026-03-08T09:21:25Z",
-    "deletions": 1,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44775",
+    "created_at": "2026-03-16T20:23:29Z",
+    "deletions": 109,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44775/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44775",
+    "labels": [],
+    "merged": false,
+    "number": 44775,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[docs] n-d parallelism",
+    "updated_at": "2026-03-16T20:28:48Z"
+  },
+  {
+    "additions": 0,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Remove `is_causal` from `EuroBertConfig`",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44774",
+    "created_at": "2026-03-16T18:56:19Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44524/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44524",
+    "files_url": "https://github.com/huggingface/transformers/pull/44774/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44774",
     "labels": [],
     "merged": true,
-    "number": 44524,
+    "number": 44774,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix AssistantTracker.is_active() returning False after activation with empty lists",
-    "updated_at": "2026-03-09T13:36:19Z"
+    "title": "Remove `is_causal` from `EuroBertConfig`",
+    "updated_at": "2026-03-17T09:33:21Z"
+  },
+  {
+    "additions": 3,
+    "author": "githubnemo",
+    "author_association": "MEMBER",
+    "body_excerpt": "The links to the quantization offloading were outdated and 4-bit quantization also supports offloading which should be mentioned. cc @SunMarc",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44772",
+    "created_at": "2026-03-16T18:46:13Z",
+    "deletions": 3,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44772/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44772",
+    "labels": [],
+    "merged": false,
+    "number": 44772,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "bitsandbytes: Update links and docs",
+    "updated_at": "2026-03-17T15:57:56Z"
   },
   {
     "additions": 2,
-    "author": "jnMetaCode",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary Fixes two small bugs in `load_sharded_checkpoint` in `trainer_utils.py`: **Bug 1 \u2014 Copy-paste error in error message (line 1108):** When reporting unexpected keys, the error message incorrectly says \"Missing key(s)\" instead of \"\u2026",
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? wtf",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44523",
-    "created_at": "2026-03-08T09:21:22Z",
-    "deletions": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44771",
+    "created_at": "2026-03-16T18:45:11Z",
+    "deletions": 1,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44771/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44771",
+    "labels": [],
+    "merged": false,
+    "number": 44771,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "wtf",
+    "updated_at": "2026-03-16T18:56:00Z"
+  },
+  {
+    "additions": 203,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fix tests failing because of `strict` type validation and decorate two missing configs, Nemotron and VibeVoice",
+    "changed_files": 12,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44770",
+    "created_at": "2026-03-16T18:44:03Z",
+    "deletions": 268,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44523/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44523",
+    "files_url": "https://github.com/huggingface/transformers/pull/44770/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44770",
     "labels": [],
     "merged": true,
-    "number": 44523,
-    "review_comments_count": 0,
+    "number": 44770,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Fix error message label and docstring default in load_sharded_checkpoint",
-    "updated_at": "2026-03-10T15:48:41Z"
+    "title": "Fix configs with `@strict`",
+    "updated_at": "2026-03-17T15:39:43Z"
   },
   {
-    "additions": 41,
-    "author": "nakigami",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? This PR introduces initial unit test coverage for the `transformers-cli` tool, specifically focusing on diagnostic and model utility commands. Currently, these CLI entry points lack automated tests. These new tests\u2026",
-    "changed_files": 1,
+    "additions": 145,
+    "author": "LincolnBurrows2017",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Summary The `is_batched_video()` and `convert_pil_frames_to_video()` functions in `src/transformers/video_utils.py` were accessing `videos[0]` without first checking if the list is empty, causing `IndexError` when empty lists are passed\u2026",
+    "changed_files": 8,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44520",
-    "created_at": "2026-03-08T01:30:39Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44769",
+    "created_at": "2026-03-16T18:40:07Z",
+    "deletions": 28,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44520/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44520",
+    "files_url": "https://github.com/huggingface/transformers/pull/44769/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44769",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44520,
+    "number": 44769,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "test(cli): add unit tests for env and model utility commands",
-    "updated_at": "2026-03-09T13:19:15Z"
+    "title": "Fix: Handle empty lists in video_utils functions",
+    "updated_at": "2026-03-18T13:15:55Z"
   },
   {
-    "additions": 3,
-    "author": "Sai-Suraj-27",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes these failing [MarianIntegrationTests](https://github.com/huggingface/transformers/actions/runs/22606636929/job/65500458014#step:14:6186) <img width=\"2378\" height=\"657\" alt=\"image\" src=\"https://github.com/user\u2026",
+    "additions": 20,
+    "author": "michaelbenayoun",
+    "author_association": "MEMBER",
+    "body_excerpt": "The function `add_tensor_parallel_hooks_to_module` has unused parameters, in this PR we: - Remove `tp_plan`, which is not used. - Remove `parameter_name` which is not used - Remove `layer_name`. This parameter is only used for logging purp\u2026",
     "changed_files": 1,
-    "cluster_id": "cluster-43453-9",
-    "cluster_ids": [
-      "cluster-43453-9"
-    ],
-    "cluster_role": "member",
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44519",
-    "created_at": "2026-03-07T19:53:23Z",
-    "deletions": 2,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44519/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44519",
-    "labels": [],
-    "merged": true,
-    "number": 44519,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix failing `MarianIntegrationTests`",
-    "updated_at": "2026-03-09T14:11:12Z"
-  },
-  {
-    "additions": 12,
-    "author": "KartikPawade",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes #44514 `Qwen2_5_VLProcessor.__call__` crashed with a `ValueError` when processing a batch of conversations with different lengths and `padding=False` (the default). **Root cause:** The `mm_token_type_ids` blo\u2026",
-    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44518",
-    "created_at": "2026-03-07T19:22:40Z",
-    "deletions": 10,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44768",
+    "created_at": "2026-03-16T18:29:52Z",
+    "deletions": 9,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44518/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44518",
+    "files_url": "https://github.com/huggingface/transformers/pull/44768/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44768",
     "labels": [],
     "merged": false,
-    "number": 44518,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: Qwen2_5_VLProcessor crashes on batched input when padding=False \u2026",
-    "updated_at": "2026-03-10T18:57:10Z"
+    "number": 44768,
+    "review_comments_count": 3,
+    "state": "open",
+    "title": "Remove unused parameters and improve add_tensor_parallel_hooks_t\u2026",
+    "updated_at": "2026-03-24T19:23:13Z"
   },
   {
-    "additions": 12637,
-    "author": "ShahVandit",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Adds Qwen3-TTS, a series of text-to-speech models by the Qwen team (Alibaba Group), to Transformers. **Architecture:** - `Qwen3TTSForConditionalGeneration` \u2014 text to multi-codebook speech codes (talker) - `Qwen3TTS\u2026",
-    "changed_files": 16,
+    "additions": 11,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? EuroBertConfig was missing `@strict(accept_kwargs=True)` unlike its parent LlamaConfig, causing failures when reloading saved configs that include extra keys like `architectures`. Also fixed the test helper passing\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44517",
-    "created_at": "2026-03-07T18:48:04Z",
-    "deletions": 24,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44767",
+    "created_at": "2026-03-16T17:31:26Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44517/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44517",
-    "labels": [
-      "New model",
-      "Audio"
-    ],
-    "merged": false,
-    "number": 44517,
-    "review_comments_count": 18,
-    "state": "open",
-    "title": "Add qwen3 tts",
-    "updated_at": "2026-03-24T20:18:59Z"
+    "files_url": "https://github.com/huggingface/transformers/pull/44767/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44767",
+    "labels": [],
+    "merged": true,
+    "number": 44767,
+    "review_comments_count": 6,
+    "state": "closed",
+    "title": "Fix: Eurobert model was missing @strict decorator and invalid test kwargs",
+    "updated_at": "2026-03-16T19:02:31Z"
   },
   {
-    "additions": 65,
-    "author": "JasonCZMeng",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Fix `Qwen2_5_VLProcessor.apply_chat_template` crashing with `ValueError` when called with batched inputs of different sequence lengths (ragged lists) and `padding=False` (the default). Fixes #44514 ## Root Cause The `mm_token_ty\u2026",
-    "changed_files": 3,
+    "additions": 26,
+    "author": "itazap",
+    "author_association": "MEMBER",
+    "body_excerpt": "for when remote code tries to import from `tokenization_xxx_fast`",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44516",
-    "created_at": "2026-03-07T18:33:40Z",
-    "deletions": 10,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44766",
+    "created_at": "2026-03-16T17:30:23Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44516/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44516",
+    "files_url": "https://github.com/huggingface/transformers/pull/44766/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44766",
     "labels": [],
-    "merged": false,
-    "number": 44516,
+    "merged": true,
+    "number": 44766,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(qwen2_5_vl): handle ragged batched input in apply_chat_template",
-    "updated_at": "2026-03-09T13:14:22Z"
+    "title": "support xxxFast alias in v5 tokenizers",
+    "updated_at": "2026-03-18T13:40:05Z"
   },
   {
-    "additions": 1,
-    "author": "Sai-Suraj-27",
+    "additions": 19,
+    "author": "harshaljanjani",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes this failing [GPTNeoModelLanguageGenerationTest](https://github.com/huggingface/transformers/actions/runs/22606636929/job/65500455894#step:14:1483) <img width=\"2363\" height=\"239\" alt=\"image\" src=\"https://githu\u2026",
-    "changed_files": 1,
-    "cluster_id": "cluster-43453-9",
-    "cluster_ids": [
-      "cluster-43453-9"
-    ],
-    "cluster_role": "member",
+    "body_excerpt": "### What does this PR do? The following failing tests were identified and fixed in this PR: \u2192 **PaliGemma 2:** The [PaliGemma 1 test class](https://github.com/huggingface/transformers/blob/main/tests/models/paligemma/test_modeling_paligemm\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
     "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44515",
-    "created_at": "2026-03-07T18:16:35Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44765",
+    "created_at": "2026-03-16T17:26:22Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44515/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44515",
+    "files_url": "https://github.com/huggingface/transformers/pull/44765/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44765",
     "labels": [],
     "merged": true,
-    "number": 44515,
+    "number": 44765,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix failing `GPTNeoModelLanguageGenerationTest`",
-    "updated_at": "2026-03-09T14:11:21Z"
+    "title": "fix(testing): Fix PaliGemma 2 and PaddleOCR-VL test failures on main",
+    "updated_at": "2026-03-20T13:55:55Z"
   },
   {
-    "additions": 1,
-    "author": "math-hiyoko",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 69,
+    "additions": 12,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fixes the siglip import. that was also crashing the test fetcher",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44513",
-    "created_at": "2026-03-07T16:11:55Z",
-    "deletions": 492,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44764",
+    "created_at": "2026-03-16T17:15:40Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44513/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44513",
+    "files_url": "https://github.com/huggingface/transformers/pull/44764/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44764",
     "labels": [],
     "merged": true,
-    "number": 44513,
-    "review_comments_count": 0,
+    "number": 44764,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Fix: Remove references to transformers run command",
-    "updated_at": "2026-03-09T15:37:16Z"
+    "title": "fix: sig lip import",
+    "updated_at": "2026-03-16T17:38:41Z"
+  },
+  {
+    "additions": 17,
+    "author": "xenova",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Adds support for MLP mixers, used by [nvidia/NVIDIA-Nemotron-3-Nano-4B-BF16](https://huggingface.co/nvidia/NVIDIA-Nemotron-3-Nano-4B-BF16). Previously, it would crash because it would not recognize the `-` char in t\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44763",
+    "created_at": "2026-03-16T17:04:36Z",
+    "deletions": 5,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44763/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44763",
+    "labels": [],
+    "merged": false,
+    "number": 44763,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "[nemotron_h] Add support for MLP mixers",
+    "updated_at": "2026-03-29T20:54:39Z"
   },
   {
     "additions": 4,
-    "author": "04cb",
-    "author_association": "NONE",
-    "body_excerpt": "Fixes #44355. The inspect.getsource() call raises TypeError when running compiled Python files with Cython-compiled functions. Added try-except block to gracefully handle this case by returning a default indentation level of 4.",
+    "author": "BillionClaw",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "XLNet.relative_positional_encoding creates intermediate tensors on CPU every forward pass because torch.arange was missing the device parameter. This causes unnecessary CPU-GPU transfers when running on CUDA. Added device=self.device to al\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44511",
-    "created_at": "2026-03-07T05:36:25Z",
-    "deletions": 1,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44762",
+    "created_at": "2026-03-16T16:17:54Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44511/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44511",
+    "files_url": "https://github.com/huggingface/transformers/pull/44762/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44762",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44511,
+    "number": 44762,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix get_docstring_indentation_level to handle compiled functions",
-    "updated_at": "2026-03-09T13:10:06Z"
+    "title": "fix: Cache XLNet relative_positional_encoding to avoid CPU computation",
+    "updated_at": "2026-03-18T15:16:14Z"
   },
   {
-    "additions": 3,
-    "author": "math-hiyoko",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 27,
+    "additions": 152,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This adds rule 10: ``` Direct config definitions must use @strict(accept_kwargs=True). ```",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44510",
-    "created_at": "2026-03-06T23:37:51Z",
-    "deletions": 358,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44761",
+    "created_at": "2026-03-16T16:05:03Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44510/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44510",
+    "files_url": "https://github.com/huggingface/transformers/pull/44761/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44761",
     "labels": [],
     "merged": true,
-    "number": 44510,
-    "review_comments_count": 1,
+    "number": 44761,
+    "review_comments_count": 7,
     "state": "closed",
-    "title": "Fix: Remove references to `text2text-generation`, `summarization` and `translation` pipeline tasks",
-    "updated_at": "2026-03-10T00:39:30Z"
+    "title": "model-linter: Added rule 10",
+    "updated_at": "2026-03-17T08:52:19Z"
   },
   {
-    "additions": 8,
-    "author": "KartikPawade",
+    "additions": 2090,
+    "author": "juliendenize",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Older OwlViT checkpoints stored `position_ids` as buffers in the text and vision embedding modules. These tensors are simple integer ranges (0 \u2192 max sequence length) and are now recomputed dynamically during initial\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 15,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44508",
-    "created_at": "2026-03-06T18:49:59Z",
-    "deletions": 0,
+    "comments_count": 12,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44760",
+    "created_at": "2026-03-16T15:54:11Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44508/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44508",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44760/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44760",
+    "labels": [
+      "New model"
+    ],
     "merged": true,
-    "number": 44508,
-    "review_comments_count": 0,
+    "number": 44760,
+    "review_comments_count": 8,
     "state": "closed",
-    "title": "Fix unexpected `position_ids` keys when loading OwlViT models",
-    "updated_at": "2026-03-18T18:30:48Z"
+    "title": "Add Mistral 4",
+    "updated_at": "2026-03-20T10:44:48Z"
   },
   {
-    "additions": 4,
-    "author": "0xDELUXA",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? `torch.distributed.fsdp` is not available in all PyTorch builds (for example, Windows ROCm). Importing it unconditionally at the top level causes an immediate crash with: ``` ModuleNotFoundError: No module named 'to\u2026",
-    "changed_files": 1,
+    "additions": 419,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Follow-up of many related PR, last one in time being https://github.com/huggingface/transformers/pull/44602. This PR completes all the models that may need non-trivial treatment. Only about 30-40 models still have m\u2026",
+    "changed_files": 42,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44507",
-    "created_at": "2026-03-06T18:03:49Z",
-    "deletions": 2,
+    "comments_count": 10,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44759",
+    "created_at": "2026-03-16T15:38:13Z",
+    "deletions": 983,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44507/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44507",
+    "files_url": "https://github.com/huggingface/transformers/pull/44759/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44759",
     "labels": [],
     "merged": true,
-    "number": 44507,
-    "review_comments_count": 0,
+    "number": 44759,
+    "review_comments_count": 26,
     "state": "closed",
-    "title": "Fix: Conditionally import `torch.distributed.fsdp` in `trainer_seq2seq.py`",
-    "updated_at": "2026-03-13T10:17:56Z"
+    "title": "Remove cache_position in more models (3)",
+    "updated_at": "2026-03-18T13:09:37Z"
   },
   {
-    "additions": 1,
-    "author": "michaelbenayoun",
+    "additions": 825,
+    "author": "LysandreJik",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? The current implementation does not work with the `mps` device and TP. ## Example script script.py ``` import os os.environ[\"PYTORCH_ENABLE_MPS_FALLBACK\"] = \"1\" import torch from transformers import AutoModelForCaus\u2026",
-    "changed_files": 1,
+    "body_excerpt": "Provides a nicer feedback when `transformers chat` loads a model, instead of hanging https://github.com/user-attachments/assets/8f68f914-b702-4430-b97f-e8cc25326b70 <p>Adds a <code>POST /load_model</code> endpoint to <code>transformers ser\u2026",
+    "changed_files": 7,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44506",
-    "created_at": "2026-03-06T18:03:33Z",
-    "deletions": 1,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44758",
+    "created_at": "2026-03-16T15:02:15Z",
+    "deletions": 63,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44506/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44506",
+    "files_url": "https://github.com/huggingface/transformers/pull/44758/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44758",
     "labels": [],
     "merged": true,
-    "number": 44506,
-    "review_comments_count": 0,
+    "number": 44758,
+    "review_comments_count": 20,
     "state": "closed",
-    "title": "Tensor Parallelism and `mps` device",
-    "updated_at": "2026-03-11T15:16:49Z"
+    "title": "Propagate the model loading from transformers serve to chat",
+    "updated_at": "2026-03-19T17:20:03Z"
   },
   {
-    "additions": 16,
-    "author": "kushalkkb",
-    "author_association": "NONE",
-    "body_excerpt": "This PR improves error handling in the load_vocab function. Changes: - Added validation to ensure vocab_file is a string path - Added check for file existence - Raised clearer FileNotFoundError when vocabulary file is missing This improves\u2026",
+    "additions": 1,
+    "author": "dacorvo",
+    "author_association": "MEMBER",
+    "body_excerpt": "## Summary - `_valid_auto_compile_criteria()` gates auto-compilation on `device.type in [\"cuda\", \"xpu\"]`, excluding Neuron devices. This means `torch.compile` never triggers automatically on Neuron even when `StaticCache` is used (which se\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44505",
-    "created_at": "2026-03-06T17:47:37Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44757",
+    "created_at": "2026-03-16T14:54:38Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44505/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44505",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44757/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44757",
+    "labels": [],
     "merged": false,
-    "number": 44505,
+    "number": 44757,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Improve error handling in load_vocab for invalid vocabulary path",
-    "updated_at": "2026-03-10T04:14:31Z"
+    "state": "open",
+    "title": "Add Neuron to auto-compile hardware list",
+    "updated_at": "2026-03-16T15:05:00Z"
   },
   {
-    "additions": 13,
-    "author": "kushalkkb",
-    "author_association": "NONE",
-    "body_excerpt": "This PR improves error handling in the load_vocab function. Changes: - Added validation to ensure vocab_file is a string path - Added check for file existence - Raised clearer FileNotFoundError when vocabulary file is missing This improves\u2026",
-    "changed_files": 1,
+    "additions": 4,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44504",
-    "created_at": "2026-03-06T17:24:10Z",
-    "deletions": 0,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44755",
+    "created_at": "2026-03-16T14:08:34Z",
+    "deletions": 148,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44504/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44504",
+    "files_url": "https://github.com/huggingface/transformers/pull/44755/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44755",
     "labels": [],
     "merged": false,
-    "number": 44504,
+    "number": 44755,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Improve error handling in load_vocab for invalid vocabulary path",
-    "updated_at": "2026-03-06T17:46:17Z"
+    "title": "Dont merge, testing smth",
+    "updated_at": "2026-03-18T10:09:15Z"
   },
   {
-    "additions": 8,
-    "author": "harshaljanjani",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? The following failing tests were identified and fixed in this PR: \u2192 **Moonshine:** In [MoonshineEncoder.forward](https://github.com/huggingface/transformers/blob/main/src/transformers/models/moonshine/modular_moon\u2026",
-    "changed_files": 4,
+    "additions": 20,
+    "author": "gambletan",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Fixes #44737 - Added `device=self.device` to all four `torch.arange()` calls in `XLNetModel.relative_positional_encoding()` so that intermediate tensors are created directly on the model's device instead of always on CPU. - With\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44503",
-    "created_at": "2026-03-06T17:08:00Z",
-    "deletions": 2,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44753",
+    "created_at": "2026-03-16T14:01:08Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44503/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44503",
+    "files_url": "https://github.com/huggingface/transformers/pull/44753/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44753",
     "labels": [],
-    "merged": true,
-    "number": 44503,
+    "merged": false,
+    "number": 44753,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(testing): Fix MoonshineEncoder UnboundLocalError and Florence2VisionBackbone dtype mismatch",
-    "updated_at": "2026-03-09T18:06:17Z"
+    "title": "fix: add device= to torch.arange in XLNet relative_positional_encoding",
+    "updated_at": "2026-03-18T13:28:40Z"
   },
   {
-    "additions": 1,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. Introduced in https://github.com/huggingface/transformers/pull/44381, not sure why the CI passed",
-    "changed_files": 1,
+    "additions": 100,
+    "author": "LincolnBurrows2017",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR fix? The `flatten()` function in `tokenization_utils_base.py` had a bug where it was checking `arr[0]` instead of `sub_arr` when determining if an element should be recursively flattened. ### Bug Details - **File**: `s\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44502",
-    "created_at": "2026-03-06T17:03:17Z",
-    "deletions": 1,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44751",
+    "created_at": "2026-03-16T13:40:44Z",
+    "deletions": 29,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44502/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44502",
-    "labels": [],
-    "merged": true,
-    "number": 44502,
+    "files_url": "https://github.com/huggingface/transformers/pull/44751/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44751",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44751,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix type checker",
-    "updated_at": "2026-03-06T17:09:37Z"
+    "title": "Fix: Correct variable reference in flatten() function",
+    "updated_at": "2026-03-18T13:16:12Z"
   },
   {
     "additions": 1,
-    "author": "frogNotToad",
+    "author": "juliendenize",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Changes the word \"maximize\" to \"minimize\" in the docs Fixes #44492 ## Before submitting - [x] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). ## Who can review? Anyon\u2026",
+    "body_excerpt": "# What does this PR do? This PR adds `apply_yarn_scaling` as an optional key for yarn repo. This was requested as part of a vLLM PR https://github.com/vllm-project/vllm/pull/37104 that seeks to silence some rope issues when converting Mist\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44501",
-    "created_at": "2026-03-06T16:58:14Z",
-    "deletions": 1,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44747",
+    "created_at": "2026-03-16T10:32:54Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44501/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44501",
+    "files_url": "https://github.com/huggingface/transformers/pull/44747/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44747",
     "labels": [],
-    "merged": true,
-    "number": 44501,
+    "merged": false,
+    "number": 44747,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fixed typo in docs/source/en/kv_cache.md",
-    "updated_at": "2026-03-06T20:05:36Z"
+    "title": "Add apply_yarn_scaling as optional key to yarn",
+    "updated_at": "2026-03-16T12:48:08Z"
   },
   {
-    "additions": 18,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? - Do proper type check in case jax is installed. - Make sure older torch versions don't raise typing issues",
-    "changed_files": 4,
+    "additions": 202,
+    "author": "LincolnBurrows2017",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Fixed Issue #44737: XLNet relative_positional_encoding function missing device parameter in torch.arange calls.",
+    "changed_files": 11,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44500",
-    "created_at": "2026-03-06T16:56:12Z",
-    "deletions": 16,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44745",
+    "created_at": "2026-03-16T09:39:30Z",
+    "deletions": 33,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44500/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44500",
+    "files_url": "https://github.com/huggingface/transformers/pull/44745/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44745",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44745,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix: add device parameter to torch.arange calls in XLNet",
+    "updated_at": "2026-03-18T13:16:43Z"
+  },
+  {
+    "additions": 35,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": null,
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 11,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44739",
+    "created_at": "2026-03-16T07:15:33Z",
+    "deletions": 7,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44739/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44739",
     "labels": [],
     "merged": true,
-    "number": 44500,
-    "review_comments_count": 0,
+    "number": 44739,
+    "review_comments_count": 12,
     "state": "closed",
-    "title": "Follow-up typing checking fixes",
-    "updated_at": "2026-03-09T10:47:31Z"
+    "title": "fix series of failed test case for janus model",
+    "updated_at": "2026-04-01T08:24:26Z"
   },
   {
-    "additions": 11,
-    "author": "michaelbenayoun",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR is just a small cleanup. The `TensorParallelLayer` class defines `_prepare_input_fn` and `_prepare_output_fn` as static methods. But then these methods end-up being instance or static methods in the sub clas\u2026",
+    "additions": 6,
+    "author": "yunhaoli24",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Fixes #44031 ## The Problem The condition for calling `_patch_mistral_regex` was too broad (`vocab_size > 100000`), causing non-Mistral models like Qwen, LLaMA, BGE-Reranker to show incorrect regex pattern warnings\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44499",
-    "created_at": "2026-03-06T16:46:18Z",
-    "deletions": 18,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44736",
+    "created_at": "2026-03-16T06:00:47Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44499/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44499",
-    "labels": [],
-    "merged": true,
-    "number": 44499,
+    "files_url": "https://github.com/huggingface/transformers/pull/44736/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44736",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44736,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Make `_prepare_input_fn` and `_prepare_output_fn` instance methods",
-    "updated_at": "2026-03-10T13:53:18Z"
+    "title": "fix: resolve false-positive regex warning for non-mistral models",
+    "updated_at": "2026-03-18T15:08:59Z"
   },
   {
     "additions": 1,
-    "author": "michaelbenayoun",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Add the `neuron` backend for initialization in TP.",
+    "author": "mango766",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes a crash in `transformers serve` when the `/v1/responses` streaming endpoint attempts to reuse a KV cache from a previous request in the same conversation session. ### The bug In `generate_response`, `inputs`\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44498",
-    "created_at": "2026-03-06T16:23:18Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44735",
+    "created_at": "2026-03-16T04:09:32Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44498/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44498",
+    "files_url": "https://github.com/huggingface/transformers/pull/44735/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44735",
     "labels": [],
     "merged": true,
-    "number": 44498,
+    "number": 44735,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "feat: add neuron in tensor parallelism initialization",
-    "updated_at": "2026-03-12T18:07:52Z"
+    "title": "Fix tensor indexing crash in serve generate_response KV cache continuation",
+    "updated_at": "2026-03-16T15:27:59Z"
   },
   {
-    "additions": 43,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. Fixes https://github.com/huggingface/transformers/issues/44466 and avoid issues with torch `.bin` checkpoints which always contain both keys!",
-    "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44497",
-    "created_at": "2026-03-06T16:21:14Z",
-    "deletions": 14,
+    "additions": 28,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "@IlyasMoutawwakil, pls help review, thx!",
+    "changed_files": 3,
+    "cluster_id": "cluster-43324-21",
+    "cluster_ids": [
+      "cluster-43324-21"
+    ],
+    "cluster_role": "member",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44733",
+    "created_at": "2026-03-16T02:55:54Z",
+    "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44497/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44497",
+    "files_url": "https://github.com/huggingface/transformers/pull/44733/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44733",
     "labels": [],
     "merged": true,
-    "number": 44497,
+    "number": 44733,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[tie weights] \ud83d\udea8 If both weights are present with same weights, still tie them",
-    "updated_at": "2026-03-09T15:00:25Z"
+    "title": "Fix bug and add XPU Expectations for qwen2 and jamba tests",
+    "updated_at": "2026-04-01T08:24:40Z"
   },
   {
-    "additions": 69,
-    "author": "vasqu",
-    "author_association": "MEMBER",
-    "body_excerpt": "As per title, WIP",
-    "changed_files": 354,
+    "additions": 1,
+    "author": "Defalt-Meh",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? While running SmolVLM tests I noticed this warning in the output: ``` tests/test_video_processing_common.py:57: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider convert\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44495",
-    "created_at": "2026-03-06T13:57:04Z",
-    "deletions": 521,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44495/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44495",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44731",
+    "created_at": "2026-03-15T23:26:31Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44731/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44731",
     "labels": [],
     "merged": false,
-    "number": 44495,
+    "number": 44731,
     "review_comments_count": 0,
     "state": "open",
-    "title": "[`Gradient Ckpting`] Remove unnecessary attribute definitions",
-    "updated_at": "2026-03-06T13:58:22Z"
+    "title": "[Tests] Fix slow video tensor creation from list of numpy arrays in SmolVLM",
+    "updated_at": "2026-03-15T23:26:31Z"
   },
   {
-    "additions": 13,
-    "author": "tarekziade",
+    "additions": 9,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "- updates `ty` to `0.2.0` - pinned regex package (older versions did not have typing stubs) - fixed a couple of typing failures that went through via other parallel branches",
-    "changed_files": 6,
+    "body_excerpt": "# What does this PR do? It's unclear why the config class has `model_type = \"mlcd_vision_model\"` but the model on the hub has \"model_type\": \"mlcd\". This leads to the following failures (load from hub --> save locally --> local locally) ```\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44494",
-    "created_at": "2026-03-06T12:57:25Z",
-    "deletions": 11,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44730",
+    "created_at": "2026-03-15T20:44:32Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44494/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44494",
+    "files_url": "https://github.com/huggingface/transformers/pull/44730/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44730",
     "labels": [],
     "merged": true,
-    "number": 44494,
-    "review_comments_count": 3,
+    "number": 44730,
+    "review_comments_count": 6,
     "state": "closed",
-    "title": "Update `ty` to 0.0.20",
-    "updated_at": "2026-03-06T13:30:25Z"
+    "title": "Fix `mlcd` auto config/model/mapping issues",
+    "updated_at": "2026-03-16T12:12:30Z"
   },
   {
-    "additions": 439,
-    "author": "SunMarc",
+    "additions": 214,
+    "author": "xenova",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Since I removed some folders (fsdp, deepspeed) related to training, I need to modify the workflows !",
-    "changed_files": 18,
+    "body_excerpt": "# What does this PR do? This PR introduces a helper utility function, `int_div_ceil`, which performs `math.ceil(a / b)` for non-negative integer operands. This is necessary as the current approach is both error-prone and imprecise (especia\u2026",
+    "changed_files": 58,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44491",
-    "created_at": "2026-03-06T11:15:42Z",
-    "deletions": 647,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44729",
+    "created_at": "2026-03-15T20:29:38Z",
+    "deletions": 225,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44491/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44491",
+    "files_url": "https://github.com/huggingface/transformers/pull/44729/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44729",
     "labels": [],
-    "merged": true,
-    "number": 44491,
-    "review_comments_count": 3,
-    "state": "closed",
-    "title": "Fix training ci and clean some tests",
-    "updated_at": "2026-03-11T16:27:57Z"
+    "merged": false,
+    "number": 44729,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Avoid floating point math for ceil operations",
+    "updated_at": "2026-03-15T20:49:34Z"
   },
   {
-    "additions": 4,
-    "author": "kaixuanliu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "@ArthurZucker @Cyrilvallez pls help review, thx! This PR fixes failed test case: `pytest -rA tests/models/eurobert/test_modeling_eurobert.py::EuroBertModelTest::test_model_parallelism`",
+    "additions": 88,
+    "author": "ajmeese7",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? Fixes a GPU memory leak in `Bnb4bitQuantize.convert()` where float16 source tensors are never freed during 4-bit quantized model loading via `from_pretrained`, causing OOM on models whose float16 size exceeds GPU VR\u2026",
     "changed_files": 2,
-    "cluster_id": "cluster-43324-12",
-    "cluster_ids": [
-      "cluster-43324-12"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44490",
-    "created_at": "2026-03-06T10:56:48Z",
-    "deletions": 0,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44728",
+    "created_at": "2026-03-15T19:56:44Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44490/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44490",
+    "files_url": "https://github.com/huggingface/transformers/pull/44728/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44728",
     "labels": [],
-    "merged": true,
-    "number": 44490,
+    "merged": false,
+    "number": 44728,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix model parallelism bug for eurobert model",
-    "updated_at": "2026-03-06T14:16:41Z"
+    "title": "Fix float16 memory leak during 4-bit quantized model loading",
+    "updated_at": "2026-03-16T20:53:54Z"
   },
   {
-    "additions": 310,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "This PR makes `.ai` the single source of truth for agent templates and skills, and adds explicit `Makefile` targets to generate `Codex` and `Claude Code` specific artifacts. It contains a first skill aimed at properly dealing with typing e\u2026",
-    "changed_files": 7,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44489",
-    "created_at": "2026-03-06T08:42:12Z",
-    "deletions": 62,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44489/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44489",
-    "labels": [],
-    "merged": true,
-    "number": 44489,
-    "review_comments_count": 2,
-    "state": "closed",
-    "title": "Centralize AI agent templates in `.ai`",
-    "updated_at": "2026-03-18T14:17:22Z"
-  },
-  {
-    "additions": 482,
-    "author": "abhijeet-dhumal",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes #44486 Adds `KubeflowCallback` to enable automatic progress and metrics reporting for training jobs running on [Kubeflow Trainer](https://github.com/kubeflow/trainer). When training runs inside a Kubeflow Trai\u2026",
-    "changed_files": 6,
+    "additions": 202,
+    "author": "LincolnBurrows2017",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Fixed issue where kwargs like force_download, proxies, token were not being passed to cached_file function.",
+    "changed_files": 11,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44487",
-    "created_at": "2026-03-06T08:31:30Z",
-    "deletions": 1,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44727",
+    "created_at": "2026-03-15T19:41:24Z",
+    "deletions": 33,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44487/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44487",
-    "labels": [],
-    "merged": true,
-    "number": 44487,
-    "review_comments_count": 8,
+    "files_url": "https://github.com/huggingface/transformers/pull/44727/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44727",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44727,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "feat(integration): Add KubeflowCallback to enable automatic progress \u2026",
-    "updated_at": "2026-03-18T14:58:23Z"
+    "title": "fix: AutoProcessor.from_pretrained not passing kwargs to cached_file",
+    "updated_at": "2026-03-18T13:15:46Z"
   },
   {
-    "additions": 691,
-    "author": "kaixuanliu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "@IlyasMoutawwakil pls help review, thx!",
-    "changed_files": 1,
+    "additions": 198,
+    "author": "LincolnBurrows2017",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Replaced bare except clause with except Exception in _safe_convert_tensor function to follow Python best practices (PEP 8).",
+    "changed_files": 10,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44482",
-    "created_at": "2026-03-06T02:39:41Z",
-    "deletions": 332,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44725",
+    "created_at": "2026-03-15T17:41:18Z",
+    "deletions": 29,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44482/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44482",
-    "labels": [],
-    "merged": true,
-    "number": 44482,
+    "files_url": "https://github.com/huggingface/transformers/pull/44725/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44725",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44725,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "add XPU Expectations for higgs_audio_v2 tests",
-    "updated_at": "2026-03-10T08:38:56Z"
+    "title": "fix: replace bare except with Exception in Fuyu image processing",
+    "updated_at": "2026-03-18T13:16:22Z"
   },
   {
-    "additions": 2353,
-    "author": "XingyuHu109",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Summary This PR adds native Transformers support for DeepSeek-V3.2. It introduces a new `deepseek_v32` model family so the official checkpoints resolve through the standard auto classes without `trust_remote_code`. The implementation ke\u2026",
-    "changed_files": 19,
+    "additions": 6,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? TO be explained.",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44481",
-    "created_at": "2026-03-05T21:14:38Z",
-    "deletions": 30,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44481/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44481",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44724",
+    "created_at": "2026-03-15T17:14:12Z",
+    "deletions": 5,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44724/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44724",
     "labels": [],
     "merged": false,
-    "number": 44481,
-    "review_comments_count": 4,
+    "number": 44724,
+    "review_comments_count": 1,
     "state": "open",
-    "title": "Add native DeepSeek-V3.2 support",
-    "updated_at": "2026-03-12T16:02:46Z"
+    "title": "Fix some missing / incorrect entries in auto files",
+    "updated_at": "2026-03-16T09:59:56Z"
   },
   {
-    "additions": 3,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? add `diffusers` to docker file for `VibeVoice` (added in PR #40546).",
-    "changed_files": 1,
+    "additions": 12,
+    "author": "aashirpersonal",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary This PR fixes #44716 by exposing and forwarding `interpolate_pos_encoding` through the Pixio embedding/model call chain so the option is actually usable from `PixioModel.forward()`. ### Changes - Added `interpolate_pos_encoding:\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44480",
-    "created_at": "2026-03-05T20:54:07Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44723",
+    "created_at": "2026-03-15T16:52:03Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44480/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44480",
-    "labels": [],
-    "merged": true,
-    "number": 44480,
+    "files_url": "https://github.com/huggingface/transformers/pull/44723/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44723",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44723,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add `diffusers` to CI docker file",
-    "updated_at": "2026-03-05T21:11:17Z"
+    "title": "Fix: propagate interpolate_pos_encoding through PixioEmbeddings and PixioModel",
+    "updated_at": "2026-03-18T15:05:52Z"
   },
   {
-    "additions": 116,
-    "author": "BenjaminBossan",
-    "author_association": "MEMBER",
-    "body_excerpt": "Required fixes: - some code was using unordered data structures, making weight order random - adjust alpha to offset increased rank from fusion - import functions from PEFT if available See https://github.com/huggingface/peft/pull/3083.",
-    "changed_files": 4,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44478",
-    "created_at": "2026-03-05T17:19:31Z",
-    "deletions": 26,
+    "additions": 38,
+    "author": "chandan11248",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Migrates the GPT-J model to use the new `@capture_outputs` and `@can_return_tuple` decorators for standardized output collection, as described in #43979. ### Changes - Added `_can_record_outputs` to `GPTJPreTrained\u2026",
+    "changed_files": 2,
+    "cluster_id": "cluster-43979-28",
+    "cluster_ids": [
+      "cluster-43979-28"
+    ],
+    "cluster_role": "member",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44722",
+    "created_at": "2026-03-15T15:33:25Z",
+    "deletions": 110,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44478/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44478",
+    "files_url": "https://github.com/huggingface/transformers/pull/44722/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44722",
     "labels": [],
-    "merged": true,
-    "number": 44478,
-    "review_comments_count": 1,
-    "state": "closed",
-    "title": "[WIP] FIX Make Mixtral LoRA loading work",
-    "updated_at": "2026-03-11T17:44:20Z"
+    "merged": false,
+    "number": 44722,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Refactor gptj output tracing to use standardized decorators",
+    "updated_at": "2026-03-19T18:12:59Z"
   },
   {
-    "additions": 1,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. It's quite a random rule to fix https://huggingface.co/fixie-ai/ultravox-v0_5-llama-3_2-1b to be honest",
+    "additions": 4,
+    "author": "rsmed31",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Fixes #44716 `PixioPatchEmbeddings.forward` already accepted `interpolate_pos_encoding` but it was silently dropped \u2014 never passed from `PixioEmbeddings.forward` or `PixioModel.forward`, making the parameter effectively unusable\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44477",
-    "created_at": "2026-03-05T16:58:29Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44718",
+    "created_at": "2026-03-14T23:57:14Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44477/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44477",
+    "files_url": "https://github.com/huggingface/transformers/pull/44718/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44718",
     "labels": [],
     "merged": false,
-    "number": 44477,
+    "number": 44718,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[vllm compat] Fix remote code inits",
-    "updated_at": "2026-03-11T10:34:06Z"
+    "title": "Fix: propagate interpolate_pos_encoding through PixioEmbeddings and PixioModel",
+    "updated_at": "2026-03-15T17:58:58Z"
   },
   {
-    "additions": 4,
-    "author": "Rocketknight1",
+    "additions": 15,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "I made an oversight in the fix at #43981 - I didn't realize the dim order changed for torch, so the test was still flaky for torch tensors. The fix reduced the flaky frequency a lot so I thought it had been fixed, but actually it's still t\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? As discussed internally, some component model classes didn't specify the correct config classes. This PR fixes them (those I could found - because the tiny model creation script fails due to those mistakes).",
+    "changed_files": 7,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44476",
-    "created_at": "2026-03-05T16:39:44Z",
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44715",
+    "created_at": "2026-03-14T21:11:52Z",
     "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44476/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44476",
+    "files_url": "https://github.com/huggingface/transformers/pull/44715/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44715",
     "labels": [],
     "merged": true,
-    "number": 44476,
+    "number": 44715,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix Llava tests for torch too!",
-    "updated_at": "2026-03-11T16:47:05Z"
+    "title": "Fix missing / incorrect `config` class in some model class definitions",
+    "updated_at": "2026-03-15T11:19:51Z"
   },
   {
-    "additions": 1,
-    "author": "itazap",
-    "author_association": "MEMBER",
-    "body_excerpt": "chameleon added to MODELS_WITH_INCORRECT_HUB_TOKENIZER_CLASS",
-    "changed_files": 1,
+    "additions": 181,
+    "author": "LincolnBurrows2017",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Summary Fixes issue #44625: Qwen3.5 num_labels not propagating from core config to text_config. When calling `AutoConfig.from_pretrained(\"Qwen3.5\", num_labels=1)`, the main config gets `num_labels=1` but `text_config` still has default\u2026",
+    "changed_files": 8,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44475",
-    "created_at": "2026-03-05T16:29:18Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44714",
+    "created_at": "2026-03-14T20:42:46Z",
+    "deletions": 26,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44475/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44475",
+    "files_url": "https://github.com/huggingface/transformers/pull/44714/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44714",
     "labels": [],
-    "merged": true,
-    "number": 44475,
+    "merged": false,
+    "number": 44714,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "chameleon added to MODELS_WITH_INCORRECT_HUB_TOKENIZER_CLASS",
-    "updated_at": "2026-03-09T22:33:20Z"
+    "title": "fix: propagate num_labels to text_config for Qwen models",
+    "updated_at": "2026-03-18T12:56:27Z"
   },
   {
-    "additions": 875,
-    "author": "JJJYmmm",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fix https://github.com/QwenLM/Qwen3.5/issues/58. In the latest code, Qwen3VL and Qwen3.5 use the same `get_rope_index` func of Qwen2VL. But they should be different since Qwen3VL/Qwen3.5 introduce text timestamps. T\u2026",
-    "changed_files": 9,
+    "additions": 15,
+    "author": "kulkarni-rohan",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Applies the output tracing refactor to ColQwen2ForRetrieval as part of the broader effort tracked in issue #43979 to modernize output handling across all models in the library. Changes in both modular_colqwen2.py and modeling_colqwen2.py:\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 10,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44474",
-    "created_at": "2026-03-05T15:46:09Z",
-    "deletions": 107,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44713",
+    "created_at": "2026-03-14T20:20:14Z",
+    "deletions": 28,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44474/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44474",
+    "files_url": "https://github.com/huggingface/transformers/pull/44713/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44713",
     "labels": [],
-    "merged": true,
-    "number": 44474,
-    "review_comments_count": 10,
-    "state": "closed",
-    "title": "[Bugfix] fix video inference of qwen3vl and qwen3.5 series",
-    "updated_at": "2026-03-10T09:52:44Z"
+    "merged": false,
+    "number": 44713,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[ColQwen2] Refactor output tracing (issue #43979)",
+    "updated_at": "2026-03-14T20:21:24Z"
   },
   {
-    "additions": 137,
-    "author": "winglian",
-    "author_association": "COLLABORATOR",
-    "body_excerpt": "# What does this PR do? supersedes #44446 on `main`, when loading to cpu and using meta devices for non-rank0 processes, it now re-initializes weights on those processes as well as uses more CPU memory. In testing with loading llama3-8b. m\u2026",
-    "changed_files": 2,
+    "additions": 2,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? torch 2.11 is going to be released soon, but we still use 2.9. Let's update it to 2.10 so at least a run with torch 2.10, before we update to torch 2.11 later.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44473",
-    "created_at": "2026-03-05T14:52:15Z",
-    "deletions": 1,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44712",
+    "created_at": "2026-03-14T20:18:01Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44473/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44473",
+    "files_url": "https://github.com/huggingface/transformers/pull/44712/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44712",
     "labels": [],
     "merged": true,
-    "number": 44473,
-    "review_comments_count": 4,
+    "number": 44712,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "fix FSDP loading with meta devices",
-    "updated_at": "2026-03-09T15:46:22Z"
+    "title": "Update Nvidia CI docker file to use torch 2.10",
+    "updated_at": "2026-03-14T20:29:04Z"
   },
   {
-    "additions": 13,
-    "author": "jblox26",
+    "additions": 339,
+    "author": "anuq",
     "author_association": "NONE",
-    "body_excerpt": "## What does this fix? Running video inference with any `Qwen3VL` model raises `StopIteration` during `model.generate()`: ``` File \".../transformers/models/qwen3_vl/modeling_qwen3_vl.py\", line 1126, in get_rope_index grid_thw = next(grid_i\u2026",
+    "body_excerpt": "## What does this PR do? Fixes #35141. When `tie_word_embeddings=False`, calling `resize_token_embeddings()` creates a new `nn.Linear` for the LM head via `_get_resized_lm_head()`. The new module's weight and bias tensors do **not** carry\u2026",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44711",
+    "created_at": "2026-03-14T19:21:21Z",
+    "deletions": 205,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44711/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44711",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44711,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix: mark new lm_head params as `_is_hf_initialized` after `resize_token_embeddings`",
+    "updated_at": "2026-03-20T13:36:58Z"
+  },
+  {
+    "additions": 12,
+    "author": "he-yufeng",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes `AutoProcessor.from_pretrained` silently dropping hub kwargs like `force_download`, `cache_dir`, `token`, `revision`, etc. ### The bug The existing code on line ~300 filters kwargs using `inspect.signature(ca\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44472",
-    "created_at": "2026-03-05T14:50:06Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44710",
+    "created_at": "2026-03-14T18:33:53Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44472/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44472",
+    "files_url": "https://github.com/huggingface/transformers/pull/44710/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44710",
     "labels": [],
-    "merged": false,
-    "number": 44472,
+    "merged": true,
+    "number": 44710,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix Qwen3VL get_rope_index StopIteration with per-frame video tokens",
-    "updated_at": "2026-03-06T15:15:58Z"
+    "title": "Fix AutoProcessor.from_pretrained silently dropping hub kwargs",
+    "updated_at": "2026-03-25T18:13:14Z"
   },
   {
-    "additions": 50,
-    "author": "weiguangli-io",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes #44466 After `.to(device)`, PyTorch's `Module._apply` may create new `Parameter` objects that no longer share storage with tied weights. This caused `remove_tied_weights_from_state_dict` to fail to detect and\u2026",
-    "changed_files": 2,
+    "additions": 6778,
+    "author": "LucasMa2025",
+    "author_association": "FIRST_TIMER",
+    "body_excerpt": "# \ud83c\udf9b\ufe0f Add Configurable Generation Scheduler and State Machine for `generate()` ## Summary This PR introduces a **fully optional, zero-intrusion** Generation Scheduler (`GenerationScheduler`) and explicit state machine (`GenerationStateMachi\u2026",
+    "changed_files": 15,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44471",
-    "created_at": "2026-03-05T14:30:17Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44708",
+    "created_at": "2026-03-14T17:13:34Z",
+    "deletions": 7,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44708/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44708",
+    "labels": [],
+    "merged": false,
+    "number": 44708,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add Configurable Generation Scheduler and State Machine for `generate()`",
+    "updated_at": "2026-03-14T19:19:11Z"
+  },
+  {
+    "additions": 3,
+    "author": "saivedant169",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes part of #32937 ## What does this PR do? Adds `position_ids` as an explicit parameter to `MptForCausalLM.forward()` and `MptModel.forward()`, bringing MPT in line with other CausalLM models. Same rationale as the Bloom PR (#44706) \u2014 M\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44707",
+    "created_at": "2026-03-14T17:12:16Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44471/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44471",
+    "files_url": "https://github.com/huggingface/transformers/pull/44707/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44707",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44471,
+    "number": 44707,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix tied weights serialization being device-dependent",
-    "updated_at": "2026-03-06T14:03:18Z"
+    "title": "Add position_ids to MptForCausalLM forward pass",
+    "updated_at": "2026-03-18T13:39:36Z"
   },
   {
-    "additions": 8,
-    "author": "weiguangli-io",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Fixes #44360 The reference `fp8_index` kernel clamps per-head q\u00b7k scores with `T.max(logits, 0)` before the weighted sum across heads ([kernel.py#L241](https://huggingface.co/deepseek-ai/DeepSeek-V3.2-Exp/blob/main/inference/kernel.py#L241\u2026",
-    "changed_files": 2,
+    "additions": 3,
+    "author": "saivedant169",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes part of #32937 ## What does this PR do? Adds `position_ids` as an explicit parameter to `BloomForCausalLM.forward()` and `BloomModel.forward()`, bringing Bloom in line with other CausalLM models like Llama, Falcon, Gemma, and Mistral\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44470",
-    "created_at": "2026-03-05T14:02:05Z",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44706",
+    "created_at": "2026-03-14T17:09:11Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44470/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44470",
+    "files_url": "https://github.com/huggingface/transformers/pull/44706/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44706",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44470,
+    "number": 44706,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add missing ReLU in GlmMoeDsaIndexer",
-    "updated_at": "2026-03-05T15:39:38Z"
+    "title": "Add position_ids to BloomForCausalLM forward pass",
+    "updated_at": "2026-03-18T13:39:51Z"
   },
   {
-    "additions": 4,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? For remote code that behave correctly with tied weights, we need to keep the same behavior as for the main lib, i.e. not remove them from tied weights (as tied weights are marked as missing to avoid inits!!)",
+    "additions": 14,
+    "author": "saivedant169",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes part of #32937 ## What does this PR do? RoFormer introduced rotary position embeddings, but its `ForCausalLM` forward method doesn't accept `position_ids` \u2014 which means callers can't specify custom positions for packed sequences or f\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44469",
-    "created_at": "2026-03-05T13:51:55Z",
-    "deletions": 2,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44705",
+    "created_at": "2026-03-14T16:48:06Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44469/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44469",
-    "labels": [],
-    "merged": true,
-    "number": 44469,
+    "files_url": "https://github.com/huggingface/transformers/pull/44705/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44705",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44705,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[remote code/vllm] Fix incorrect tied weights",
-    "updated_at": "2026-03-05T15:07:56Z"
+    "title": "Add position_ids to RoFormerForCausalLM forward pass",
+    "updated_at": "2026-03-18T13:40:05Z"
   },
   {
-    "additions": 13,
-    "author": "itazap",
+    "additions": 26,
+    "author": "vasqu",
     "author_association": "MEMBER",
-    "body_excerpt": "Replace placeholder tokens as specified in added_tokens_decoder if we have added_tokens_decoder with specific token_ids, we need to overwrite them in spm model ! example: [UNUSED_TOKEN_146] -> <|im_start|> see internlm2: https://huggingfac\u2026",
-    "changed_files": 1,
+    "body_excerpt": "As per title, it seems that the `cute` subfolder can be even distributed if you only install FA2 which implies something wrong. Now we check under the (normalized) distribution names",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44468",
-    "created_at": "2026-03-05T13:48:56Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44703",
+    "created_at": "2026-03-14T14:46:02Z",
+    "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44468/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44468",
+    "files_url": "https://github.com/huggingface/transformers/pull/44703/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44703",
     "labels": [],
     "merged": true,
-    "number": 44468,
-    "review_comments_count": 0,
+    "number": 44703,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Replace placeholder tokens as specified in added_tokens_decoder",
-    "updated_at": "2026-03-05T16:29:13Z"
+    "title": "[`FA`] Fix fa detection",
+    "updated_at": "2026-03-14T17:19:07Z"
   },
   {
-    "additions": 346,
-    "author": "itazap",
-    "author_association": "MEMBER",
-    "body_excerpt": "Replace placeholder tokens as specified in added_tokens_decoder if we have `added_tokens_decoder` with specific token_ids, we need to overwrite them in spm model ! `example: [UNUSED_TOKEN_146] -> <|im_start|>` see internlm2: https://huggin\u2026",
-    "changed_files": 24,
+    "additions": 148,
+    "author": "LincolnBurrows2017",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR fix? The `rms_norm_eps` parameter in `MistralConfig` was incorrectly typed as `int | None` but defaults to `1e-6` which is a float. This parameter is passed to `MistralRMSNorm` which expects `eps: float`. ### Bug Detai\u2026",
+    "changed_files": 8,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44467",
-    "created_at": "2026-03-05T13:44:54Z",
-    "deletions": 204,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44702",
+    "created_at": "2026-03-14T14:41:15Z",
+    "deletions": 25,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44467/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44467",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44702/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44702",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44467,
+    "number": 44702,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Placeholder tokens update",
-    "updated_at": "2026-03-05T13:47:28Z"
+    "state": "closed",
+    "title": "fix: Correct rms_norm_eps type hint from int to float in MistralConfig",
+    "updated_at": "2026-03-18T13:00:12Z"
   },
   {
-    "additions": 20,
-    "author": "kashif",
+    "additions": 219,
+    "author": "hmellor",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fix the loss calculation; we should calculate it on scaled targets. <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with\u2026",
-    "changed_files": 2,
+    "body_excerpt": "These models have `base_model_pp_plan`s but currently do not work because the base model's forward pass depends on all the `layers` being `Qwen2VLDecoderLayer`. i.e. if one of the layers is removed/replaced with `Identity`, `decoder_layer.\u2026",
+    "changed_files": 52,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44465",
-    "created_at": "2026-03-05T12:59:23Z",
-    "deletions": 14,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44699",
+    "created_at": "2026-03-14T11:44:24Z",
+    "deletions": 148,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44465/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44465",
-    "labels": [
-      "bug"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44699/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44699",
+    "labels": [],
     "merged": true,
-    "number": 44465,
+    "number": 44699,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[timesfm2_5] fix loss scaling",
-    "updated_at": "2026-03-05T14:50:26Z"
+    "title": "Fix several based models' pipeline parallel support",
+    "updated_at": "2026-03-20T13:53:27Z"
   },
   {
-    "additions": 16,
-    "author": "weiguangli-io",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes #44462 When a model's `model_type` (e.g. `\"llama\"`) has no entry in `TOKENIZER_MAPPING_NAMES`, `AutoTokenizer.from_pretrained` falls through to loading the tokenizer class declared in `tokenizer_config.json`\u2026",
+    "additions": 1,
+    "author": "hmellor",
+    "author_association": "MEMBER",
+    "body_excerpt": "The typo in the `elif` chain meant that `image` and `video` modalidty encoders could not be set using this method. This PR fixes the typo so that they can.",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44463",
-    "created_at": "2026-03-05T12:45:57Z",
-    "deletions": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44698",
+    "created_at": "2026-03-14T11:18:54Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44463/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44463",
+    "files_url": "https://github.com/huggingface/transformers/pull/44698/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44698",
     "labels": [],
-    "merged": false,
-    "number": 44463,
+    "merged": true,
+    "number": 44698,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix AutoTokenizer ignoring tokenizer.json for unregistered model types",
-    "updated_at": "2026-03-07T13:50:44Z"
+    "title": "Fix `set_encoder`",
+    "updated_at": "2026-03-14T13:42:00Z"
   },
   {
-    "additions": 12,
-    "author": "kashif",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? for SP loss we do not have torch device mesh but rather a deepspeed only. <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes\u2026",
-    "changed_files": 1,
+    "additions": 75,
+    "author": "LincolnBurrows2017",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Description The `torch_float` function in `src/transformers/utils/generic.py` was incorrectly returning `int(x)` in two places where it should return `float(x)`: 1. When torch is not available (fallback case) 2. When not in a tracing co\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44461",
-    "created_at": "2026-03-05T11:39:02Z",
-    "deletions": 1,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44697",
+    "created_at": "2026-03-14T10:44:12Z",
+    "deletions": 25,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44461/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44461",
+    "files_url": "https://github.com/huggingface/transformers/pull/44697/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44697",
     "labels": [],
-    "merged": true,
-    "number": 44461,
+    "merged": false,
+    "number": 44697,
     "review_comments_count": 1,
-    "state": "closed",
-    "title": "[Trainer] fix SP loss",
-    "updated_at": "2026-03-05T13:00:40Z"
+    "state": "open",
+    "title": "fix: torch_float should return float, not int",
+    "updated_at": "2026-03-17T19:29:02Z"
   },
   {
-    "additions": 1,
-    "author": "3outeille",
+    "additions": 19,
+    "author": "hmellor",
     "author_association": "MEMBER",
-    "body_excerpt": null,
+    "body_excerpt": "In configs, `base_model_pp_plan` and `base_model_tp_plan` default to `None` In models, `_pp_plan` and `_tp_plan` _look like_ they default to `None` based on the class variables, but will actually always be a dict because of `post_init`. Th\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44460",
-    "created_at": "2026-03-05T10:53:07Z",
-    "deletions": 1,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44696",
+    "created_at": "2026-03-14T09:41:07Z",
+    "deletions": 13,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44460/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44460",
+    "files_url": "https://github.com/huggingface/transformers/pull/44696/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44696",
     "labels": [],
     "merged": true,
-    "number": 44460,
-    "review_comments_count": 0,
+    "number": 44696,
+    "review_comments_count": 5,
     "state": "closed",
-    "title": "trigger tensor parallel utils test in the CI",
-    "updated_at": "2026-03-05T11:25:51Z"
+    "title": "Fix `supports_{tp/pp}_plan`",
+    "updated_at": "2026-03-31T13:12:56Z"
   },
   {
-    "additions": 95,
-    "author": "weiguangli-io",
+    "additions": 4,
+    "author": "harshaljanjani",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary Fixes #44458 PR #42848 introduced a regression where `torch.compile` on `MllamaForConditionalGeneration` fails with a C++ compile error from the torch inductor backend (`'tmp2' was not declared in this scope`). The root cause is\u2026",
+    "body_excerpt": "### What does this PR do? The following failing tests were identified and fixed in this PR: \u2192 **Kyutai Speech-To-Text**: [The PR [processors] Unbloating simple processors](https://github.com/huggingface/transformers/pull/40377), [refactore\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44459",
-    "created_at": "2026-03-05T07:58:28Z",
-    "deletions": 7,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44695",
+    "created_at": "2026-03-14T09:05:35Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44459/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44459",
+    "files_url": "https://github.com/huggingface/transformers/pull/44695/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44695",
     "labels": [],
     "merged": false,
-    "number": 44459,
-    "review_comments_count": 2,
-    "state": "closed",
-    "title": "fix: make Mllama cross attention mask compatible with torch.compile",
-    "updated_at": "2026-03-07T13:50:40Z"
+    "number": 44695,
+    "review_comments_count": 3,
+    "state": "open",
+    "title": "fix(testing): Fix Kyutai Speech-To-Text and LongCatFlash test failures on main CI",
+    "updated_at": "2026-04-02T14:51:41Z"
   },
   {
-    "additions": 1,
-    "author": "Sai-Suraj-27",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes this failing [DepthProModelIntegrationTest](https://github.com/huggingface/transformers/actions/runs/22606636929/job/65500453624#step:14:4893). <img width=\"2231\" height=\"99\" alt=\"image\" src=\"https://github.com\u2026",
-    "changed_files": 1,
-    "cluster_id": "cluster-43453-9",
-    "cluster_ids": [
-      "cluster-43453-9"
-    ],
-    "cluster_role": "member",
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44456",
-    "created_at": "2026-03-05T06:01:06Z",
-    "deletions": 1,
+    "additions": 143,
+    "author": "LincolnBurrows2017",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Summary Fixes issue #44625: Qwen3.5 num_labels not propagated from core config to text config. When loading `AutoConfig.from_pretrained(\"Qwen3.5\", num_labels=1)`, the outer config gets `num_labels=1` but the inner `text_config` still ha\u2026",
+    "changed_files": 7,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44693",
+    "created_at": "2026-03-14T05:43:00Z",
+    "deletions": 30,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44456/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44456",
+    "files_url": "https://github.com/huggingface/transformers/pull/44693/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44693",
     "labels": [],
-    "merged": true,
-    "number": 44456,
+    "merged": false,
+    "number": 44693,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix failing `DepthProModelIntegrationTest`",
-    "updated_at": "2026-03-05T14:52:40Z"
+    "title": "fix: Propagate num_labels to text_config in Qwen3.5",
+    "updated_at": "2026-03-18T12:56:25Z"
   },
   {
-    "additions": 3,
-    "author": "jiqing-feng",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Check if accelerator exists before using `pin_memory`. reproduce it on a CPU only node: `python examples/pytorch/continuous_batching_simple.py` output: ``` File \"/home/jiqingfe/transformers/src/transformers/generation/continuous_batching/i\u2026",
-    "changed_files": 1,
+    "additions": 18,
+    "author": "gambletan",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Fixes #44514. `Qwen2_5_VLProcessor.apply_chat_template` crashes with `ValueError` when called with batched input and `padding=False` (the default). The root cause is `np.array(text_inputs[\"input_ids\"])` which fails when sequence\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44455",
-    "created_at": "2026-03-05T05:20:13Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44692",
+    "created_at": "2026-03-14T04:14:38Z",
+    "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44455/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44455",
-    "labels": [],
-    "merged": true,
-    "number": 44455,
-    "review_comments_count": 6,
+    "files_url": "https://github.com/huggingface/transformers/pull/44692/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44692",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44692,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "fix pin_memory for contiguous batching",
-    "updated_at": "2026-03-09T13:49:30Z"
+    "title": "fix: handle ragged input_ids in Qwen2_5_VLProcessor.apply_chat_template",
+    "updated_at": "2026-03-18T12:44:18Z"
   },
   {
-    "additions": 17,
-    "author": "weiguangli-io",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary - Fixes the `_init_weights` method in `PegasusPreTrainedModel` and `MarianPreTrainedModel` to handle sinusoidal position embeddings before calling `super()._init_weights(module)`, preventing the generic `nn.Embedding` branch fro\u2026",
-    "changed_files": 3,
-    "cluster_id": "cluster-44053-8",
-    "cluster_ids": [
-      "cluster-44053-8"
-    ],
-    "cluster_role": "member",
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44454",
-    "created_at": "2026-03-05T03:51:38Z",
-    "deletions": 7,
+    "additions": 23,
+    "author": "gambletan",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - Fixes `num_labels` (and `id2label`/`label2id`) not being propagated from the outer `Qwen3_5Config` to its inner `text_config` when passed via `AutoConfig.from_pretrained(..., num_labels=1)`. - When `text_config` is `None` or a\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44691",
+    "created_at": "2026-03-14T04:10:54Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44454/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44454",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44691/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44691",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44454,
+    "number": 44691,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix Pegasus sinusoidal position embedding init regression in v5",
-    "updated_at": "2026-03-09T02:17:41Z"
+    "title": "Fix Qwen3.5 num_labels not propagated to text_config",
+    "updated_at": "2026-03-18T12:57:19Z"
   },
   {
-    "additions": 1,
-    "author": "weiguangli-io",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Fix KeyError in `convert_to_native_format` for dict vocab Fixes #44451 ### Problem `AutoTokenizer.from_pretrained(\"vesteinn/ScandiBERT\")` raises `KeyError: 0` in `convert_to_native_format`. ScandiBERT's `tokenizer_config.json` specifies\u2026",
-    "changed_files": 1,
+    "additions": 6,
+    "author": "gambletan",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Fixes #44360 The `GlmMoeDsaIndexer` is missing a ReLU activation on the per-head dot-product scores before the weighted sum across heads. The reference DeepSeek V3.2 implementation applies ReLU inside the `fp8_index` kernel: ```\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44452",
-    "created_at": "2026-03-05T03:34:02Z",
-    "deletions": 1,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44690",
+    "created_at": "2026-03-14T03:44:37Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44452/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44452",
-    "labels": [],
-    "merged": true,
-    "number": 44452,
+    "files_url": "https://github.com/huggingface/transformers/pull/44690/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44690",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44690,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix KeyError in convert_to_native_format for dict vocab",
-    "updated_at": "2026-03-19T13:59:23Z"
+    "title": "Fix missing ReLU in GLM-MOE-DSA indexer scoring",
+    "updated_at": "2026-03-18T12:40:23Z"
   },
   {
-    "additions": 297,
-    "author": "sandesh-bhandari-dev",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 32,
+    "additions": 141,
+    "author": "LincolnBurrows2017",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Summary Fixes issue #44625: Qwen3.5 num_labels not propagating to text_config. When calling `AutoConfig.from_pretrained(\"Qwen3.5\", num_labels=1)`, the main config gets `num_labels=1` but text_config still has default `num_labels=2`. Thi\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44449",
-    "created_at": "2026-03-05T01:40:47Z",
-    "deletions": 319,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44688",
+    "created_at": "2026-03-14T00:40:50Z",
+    "deletions": 23,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44449/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44449",
+    "files_url": "https://github.com/huggingface/transformers/pull/44688/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44688",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44449,
+    "number": 44688,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: 3 bugs : MoE aux loss, ANSI TTY leak, pipeline removed and also task error",
-    "updated_at": "2026-03-05T13:22:40Z"
+    "title": "fix: Propagate num_labels to text_config in Qwen models",
+    "updated_at": "2026-03-18T12:56:41Z"
   },
   {
     "additions": 8,
-    "author": "yonigozlan",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixes MiniCPM-o-2_6 related tests failures in vLLM, and improve backward compatibility with remote code in general. Cc @hmellor @zucchini-nlp",
+    "author": "vxa8502",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes partial #32937 Adds explicit `position_ids` threading through GPT-Neo's attention layers to enable flash attention's packed sequence optimization. ## Context GPT-Neo uses learned absolute position embeddings (`wpe`) applied at the mo\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44447",
-    "created_at": "2026-03-04T21:55:16Z",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44687",
+    "created_at": "2026-03-13T23:28:55Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44447/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44447",
-    "labels": [],
-    "merged": true,
-    "number": 44447,
+    "files_url": "https://github.com/huggingface/transformers/pull/44687/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44687",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44687,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[vLLM] Fix backward compatibility with hardcoded subprocessors classes in processors",
-    "updated_at": "2026-03-05T16:07:28Z"
+    "title": "Add explicit position_ids to GPT-Neo attention layers",
+    "updated_at": "2026-03-18T13:06:49Z"
   },
   {
-    "additions": 4,
-    "author": "SunMarc",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR fixes fsdp loading for rank!=0 as they should stay on meta device. This was reverted in a PR that I can find anymore.",
-    "changed_files": 1,
+    "additions": 615,
+    "author": "tejasae-afk",
+    "author_association": "NONE",
+    "body_excerpt": "During an automated code review of src/transformers/models/marian/convert_marian_to_pytorch.py, the following issue was identified. Use safe_load in convert marian to pytorch. yaml.load on untrusted input can construct arbitrary Python obj\u2026",
+    "changed_files": 80,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44446",
-    "created_at": "2026-03-04T21:38:00Z",
-    "deletions": 2,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44686",
+    "created_at": "2026-03-13T21:22:07Z",
+    "deletions": 259,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44446/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44446",
+    "files_url": "https://github.com/huggingface/transformers/pull/44686/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44686",
     "labels": [],
     "merged": false,
-    "number": 44446,
+    "number": 44686,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix fdsp loading meta device",
-    "updated_at": "2026-03-05T15:07:57Z"
+    "title": "Use safe_load in convert marian to pytorch",
+    "updated_at": "2026-03-14T03:54:31Z"
   },
   {
-    "additions": 2282,
-    "author": "gabe-l-hart",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR adds support for the forthcoming Granite Docling model based on the Granite 4 LLM architecture (`GraniteMoeHybrid`). ## Draft Status This PR is in draft pending the possibility of some additional changes: -\u2026",
-    "changed_files": 8,
+    "additions": 10,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? For tiny model creation script - new added model test files still miss this argument ...",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44445",
-    "created_at": "2026-03-04T20:54:17Z",
-    "deletions": 0,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44445/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44445",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44685",
+    "created_at": "2026-03-13T20:53:41Z",
+    "deletions": 3,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44685/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44685",
     "labels": [],
-    "merged": false,
-    "number": 44445,
-    "review_comments_count": 44,
-    "state": "open",
-    "title": "Adding support for GraniteDoclingHybrid",
-    "updated_at": "2026-03-11T19:19:44Z"
+    "merged": true,
+    "number": 44685,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix more model tester missing `parent` issue",
+    "updated_at": "2026-03-13T21:03:46Z"
   },
   {
-    "additions": 7,
-    "author": "harshaljanjani",
+    "additions": 41,
+    "author": "ntenenz",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? The following failing use case was identified and fixed in this PR: \u2192 [TOKENIZER_MAPPING_NAMES](https://github.com/huggingface/transformers/blob/main/src/transformers/models/auto/tokenization_auto.py#L63-L338) doe\u2026",
+    "body_excerpt": "\u2026 # What does this PR do? In torch versions >= 2.9.0, it requests the lse from flex_attenetion using `AuxRequest` instead of the deprecated `return_lse`, which triggers a warning and can break tracing. Fixes #44683 ## Before submitting - [\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 9,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44443",
-    "created_at": "2026-03-04T20:01:55Z",
-    "deletions": 7,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44684",
+    "created_at": "2026-03-13T20:16:35Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44443/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44443",
+    "files_url": "https://github.com/huggingface/transformers/pull/44684/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44684",
     "labels": [],
     "merged": true,
-    "number": 44443,
-    "review_comments_count": 0,
+    "number": 44684,
+    "review_comments_count": 8,
     "state": "closed",
-    "title": "fix(tokenizer): Only strip Fast from class names in AutoTokenizer if used as a suffix",
-    "updated_at": "2026-03-09T15:03:49Z"
+    "title": "update flex attention to use `return_aux` instead of `return_lse` when torch verison >= 2.9",
+    "updated_at": "2026-03-18T11:44:18Z"
   },
   {
-    "additions": 37,
-    "author": "NielsRogge",
+    "additions": 301,
+    "author": "SunMarc",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR fixes the training of LW-DETR. It turned out that the model was not able to overfit a single batch. Hence I asked Codex to investigate this. It turns out there were 3 bugs: 1. A logits calibration gap, cause\u2026",
-    "changed_files": 3,
+    "body_excerpt": "# What does this PR do? Llama cpp integration in transformers serve. Minor changes to add llama.cpp integration Mostly changes on serve to fix latency for streaming and non streaming",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44441",
-    "created_at": "2026-03-04T19:54:20Z",
-    "deletions": 48,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44682",
+    "created_at": "2026-03-13T18:52:41Z",
+    "deletions": 73,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44441/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44441",
+    "files_url": "https://github.com/huggingface/transformers/pull/44682/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44682",
     "labels": [],
-    "merged": true,
-    "number": 44441,
-    "review_comments_count": 4,
-    "state": "closed",
-    "title": "[LW-DETR] Fix training",
-    "updated_at": "2026-03-09T15:36:02Z"
+    "merged": false,
+    "number": 44682,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "transformers serve + llamacpp",
+    "updated_at": "2026-03-14T07:05:29Z"
   },
   {
-    "additions": 11,
-    "author": "weiguangli-io",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes the noisy `HfHubHTTPError` exception output that appears when loading a transformer model from a repository that has discussions disabled. ### Root cause The `previous_pr()` function in `safetensors_conversio\u2026",
+    "additions": 47,
+    "author": "dacorvo",
+    "author_association": "MEMBER",
+    "body_excerpt": "Fixes #44679 ## Summary - Custom attention kernels registered via `load_and_register_attn_kernel` currently get hardcoded `flash_attention_2` mask dispatch, which produces 2D or `None` masks - Kernels that need SDPA-style 4D boolean masks\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44440",
-    "created_at": "2026-03-04T18:31:13Z",
-    "deletions": 7,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44680",
+    "created_at": "2026-03-13T17:55:54Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44440/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44440",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44680/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44680",
+    "labels": [],
     "merged": false,
-    "number": 44440,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: catch HfHubHTTPError in safetensors auto_conversion thread",
-    "updated_at": "2026-03-05T15:39:11Z"
+    "number": 44680,
+    "review_comments_count": 12,
+    "state": "open",
+    "title": "Allow kernel modules to declare their preferred mask function",
+    "updated_at": "2026-03-19T11:27:09Z"
   },
   {
-    "additions": 5,
-    "author": "Sai-Suraj-27",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes this failing [ProphetNetModelIntegrationTest](https://github.com/huggingface/transformers/actions/runs/22606636929/job/65500453490#step:14:2331). <img width=\"2303\" height=\"165\" alt=\"image\" src=\"https://github.\u2026",
+    "additions": 9,
+    "author": "JokeYoonic",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Problem: - On macOS ARM64 + Python 3.13 + transformers 5.x, GPT-2 model's lm_head forward pass produces NaN/Inf values during inference - Root cause: lm_head.weight is tied to transformer.wte.weight, and the shared memory reference causes\u2026",
     "changed_files": 1,
-    "cluster_id": "cluster-43453-9",
-    "cluster_ids": [
-      "cluster-43453-9"
-    ],
-    "cluster_role": "member",
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44439",
-    "created_at": "2026-03-04T16:55:50Z",
-    "deletions": 5,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44439/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44439",
-    "labels": [],
-    "merged": true,
-    "number": 44439,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix failing `ProphetNetModelIntegrationTest`",
-    "updated_at": "2026-03-05T15:43:59Z"
-  },
-  {
-    "additions": 135,
-    "author": "SunMarc",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR adds flashoptim from databricks team into Trainer ! cc @tomaarsen ### Results ``` Optimizer Loss Time Speed Memory \u0394 Mem \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500 AdamW Fused 1.4\u2026",
-    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44438",
-    "created_at": "2026-03-04T16:31:35Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44676",
+    "created_at": "2026-03-13T16:28:01Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44438/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44438",
+    "files_url": "https://github.com/huggingface/transformers/pull/44676/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44676",
     "labels": [],
     "merged": false,
-    "number": 44438,
+    "number": 44676,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Add flashoptim",
-    "updated_at": "2026-03-04T18:52:42Z"
+    "title": "fix(gpt2): Resolve NaN/Inf issue in lm_head on Python 3.13 with tied weights",
+    "updated_at": "2026-03-18T17:16:49Z"
   },
   {
-    "additions": 150,
-    "author": "ArthurZucker",
+    "additions": 32,
+    "author": "stevhliu",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 5,
+    "body_excerpt": "properly formats the `ContinuousBatchingConfig` below: <img width=\"976\" height=\"626\" alt=\"Screenshot 2026-03-13 at 9 09 39 AM\" src=\"https://github.com/user-attachments/assets/4390c6f7-bb63-4039-a46e-9f4ae23f5d98\" />",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44437",
-    "created_at": "2026-03-04T15:34:34Z",
-    "deletions": 134,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44675",
+    "created_at": "2026-03-13T16:10:28Z",
+    "deletions": 14,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44437/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44437",
+    "files_url": "https://github.com/huggingface/transformers/pull/44675/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44675",
     "labels": [],
     "merged": true,
-    "number": 44437,
-    "review_comments_count": 5,
+    "number": 44675,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "protect imports",
-    "updated_at": "2026-03-04T16:19:49Z"
+    "title": "[docs] cb config",
+    "updated_at": "2026-03-13T23:15:04Z"
   },
   {
-    "additions": 8,
-    "author": "jw9603",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Fixes #44423 `continuous_batching_chat_completion` was missing input preprocessing and `tokenize=True` in `apply_chat_template`, causing `'str' object has no attribute 'to'` for multimodal models. Added the same `get_model_modality` + `get\u2026",
-    "changed_files": 1,
+    "additions": 408,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "We've had `parse_response()` in the library for a while, but it's been a soft launch / prototype feature. This PR cleans it up and documents it, making it an official feature! The API is largely unchanged from the prototype, but we drop `x\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44436",
-    "created_at": "2026-03-04T15:26:48Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44674",
+    "created_at": "2026-03-13T15:41:42Z",
+    "deletions": 34,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44436/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44436",
+    "files_url": "https://github.com/huggingface/transformers/pull/44674/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44674",
     "labels": [],
     "merged": true,
-    "number": 44436,
-    "review_comments_count": 4,
+    "number": 44674,
+    "review_comments_count": 11,
     "state": "closed",
-    "title": "Fix continuous batching for multimodal models",
-    "updated_at": "2026-03-09T13:58:37Z"
+    "title": "Officially launch parse_response",
+    "updated_at": "2026-03-24T15:55:05Z"
   },
   {
-    "additions": 434,
+    "additions": 73,
     "author": "remi-or",
     "author_association": "MEMBER",
-    "body_excerpt": "This PR adds the option to have a ContinuousBatchingManager not be destroyed after generation is over. This allows the user to re-use the manager without requiring him to know any other entry point for CB apart from `generate_batch` Requir\u2026",
-    "changed_files": 5,
+    "body_excerpt": "This PR fixes a bug in continuous batching where non-CUDA devices cannot use the feature because some CUDA-exclusive objects are always instantiated. It also adds a test to make sure this will not break again in the future.",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44435",
-    "created_at": "2026-03-04T14:17:08Z",
-    "deletions": 268,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44435/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44435",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44673",
+    "created_at": "2026-03-13T15:37:01Z",
+    "deletions": 15,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44673/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44673",
     "labels": [],
-    "merged": false,
-    "number": 44435,
+    "merged": true,
+    "number": 44673,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "[CB] Persistent manager",
-    "updated_at": "2026-03-04T14:17:15Z"
+    "state": "closed",
+    "title": "[CB] [Bug] Fix crashes when running without cuda",
+    "updated_at": "2026-03-15T23:59:55Z"
   },
   {
-    "additions": 413,
-    "author": "remi-or",
-    "author_association": "MEMBER",
-    "body_excerpt": "This PR adds a dedicated config for continuous batching, which is starting to have a lot parameters. This will give the user a clear view of what is possible and make adding new parameters easier. No breaking changes through `account_for_c\u2026",
-    "changed_files": 9,
+    "additions": 1,
+    "author": "neo",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44434",
-    "created_at": "2026-03-04T13:49:05Z",
-    "deletions": 303,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44672",
+    "created_at": "2026-03-13T15:33:15Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44434/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44434",
+    "files_url": "https://github.com/huggingface/transformers/pull/44672/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44672",
     "labels": [],
     "merged": true,
-    "number": 44434,
-    "review_comments_count": 12,
+    "number": 44672,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[CB] Add dedicated config",
-    "updated_at": "2026-03-13T13:56:40Z"
+    "title": "Fix annotations reader for python 3.14 in `PreTrainedModel`",
+    "updated_at": "2026-03-19T13:30:48Z"
   },
   {
-    "additions": 177,
-    "author": "leopold-tzafon",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Instead of silently failing when mm_token_type_ids is not passed, derives it in Qwen3 and Qwen3.5. Same as it was before: https://github.com/huggingface/transformers/commit/c281a2de8998e66e93fac30a236225528531df9b P\u2026",
-    "changed_files": 18,
+    "additions": 6,
+    "author": "dacorvo",
+    "author_association": "MEMBER",
+    "body_excerpt": "Fixes #44678 ## Summary - Replace advanced indexing (`self.gate_up_proj[expert_ids]`) with explicit `torch.index_select(self.gate_up_proj, 0, expert_ids)` in `batched_mm_experts_forward` - 6 replacements total (3 weight tensors + 3 bias te\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 9,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44433",
-    "created_at": "2026-03-04T13:46:14Z",
-    "deletions": 61,
+    "comments_count": 11,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44669",
+    "created_at": "2026-03-13T14:52:22Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44433/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44433",
+    "files_url": "https://github.com/huggingface/transformers/pull/44669/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44669",
     "labels": [],
-    "merged": true,
-    "number": 44433,
+    "merged": false,
+    "number": 44669,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: raise error if mm_token_type_ids not supplied ",
-    "updated_at": "2026-03-12T17:12:47Z"
+    "title": "Use `index_select` instead of advanced indexing in `batched_mm_experts_forward`",
+    "updated_at": "2026-03-19T13:39:23Z"
   },
   {
-    "additions": 85,
-    "author": "zucchini-nlp",
+    "additions": 18,
+    "author": "dacorvo",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per title, instead of having to divide image token by norm scale, we can do it same way as in other model (eg. gemma3) and add a custom embed layer. It should be 100% BC because users usually call `self.embed_tok\u2026",
-    "changed_files": 8,
+    "body_excerpt": "Fixes #44677 ## Summary - Add `base_model_tp_plan` to `OlmoeConfig`, enabling `from_pretrained(tp_plan=\"auto\")` for OLMoE models - Add `TensorParallelTesterMixin` to OLMoE tests for TP validation coverage - Uses `\"colwise\"` for `q_norm` an\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44432",
-    "created_at": "2026-03-04T10:04:40Z",
-    "deletions": 38,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44668",
+    "created_at": "2026-03-13T14:45:22Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44432/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44432",
+    "files_url": "https://github.com/huggingface/transformers/pull/44668/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44668",
     "labels": [],
     "merged": true,
-    "number": 44432,
-    "review_comments_count": 0,
+    "number": 44668,
+    "review_comments_count": 4,
     "state": "closed",
-    "title": "Make paligemma embed tokens standard",
-    "updated_at": "2026-03-11T08:38:41Z"
+    "title": "Add `base_model_tp_plan` to `OlmoeConfig`",
+    "updated_at": "2026-03-26T13:58:58Z"
   },
   {
-    "additions": 2931,
+    "additions": 412,
     "author": "zucchini-nlp",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Re-opening back a PR on cleaning up clip-like model's backbones. Let's merge it now, I've been seeing quite a lot of ppl reporting it and I am not sure when it will be resolved by the big vision refactor Basically,\u2026",
-    "changed_files": 36,
+    "body_excerpt": "# What does this PR do? A few find-and-replaces and cache position is deleted from the rest of models. Still have to check docs and test files, so WIP",
+    "changed_files": 120,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 8,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44431",
-    "created_at": "2026-03-04T10:02:13Z",
-    "deletions": 1781,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44667",
+    "created_at": "2026-03-13T14:37:26Z",
+    "deletions": 1519,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44431/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44431",
+    "files_url": "https://github.com/huggingface/transformers/pull/44667/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44667",
     "labels": [],
     "merged": false,
-    "number": 44431,
-    "review_comments_count": 49,
-    "state": "open",
-    "title": "Refactor CLIP-like models",
-    "updated_at": "2026-03-25T21:26:08Z"
+    "number": 44667,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Goodbye cache position",
+    "updated_at": "2026-03-19T11:55:04Z"
   },
   {
-    "additions": 0,
-    "author": "Rohang2005",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? This PR fixes an inconsistency in the AFMoE module where `past_key_values` was passed to a function argument expecting `past_key_value`. The function signature expects a singular cache object (`past_key_value`), bu\u2026",
-    "changed_files": 0,
+    "additions": 17,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? - 2 model tester classes didn't follow the usual way we do things, which cause the tiny model creation script to fail with those model classes. - (the script initializes instances of model testers, in order to call\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44430",
-    "created_at": "2026-03-04T08:13:38Z",
-    "deletions": 0,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44666",
+    "created_at": "2026-03-13T14:24:50Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44430/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44430",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44430,
+    "files_url": "https://github.com/huggingface/transformers/pull/44666/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44666",
+    "labels": [],
+    "merged": true,
+    "number": 44666,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix inconsistent past_key_value/past_key_values usage in AFMoE modeling",
-    "updated_at": "2026-03-04T14:07:32Z"
+    "title": "Another (small) set of fixes required for tiny model creation",
+    "updated_at": "2026-03-13T17:20:52Z"
   },
   {
-    "additions": 14,
-    "author": "thakoreh",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #44336 The `loading_report` module was using `PALETTE['italic']` and `PALETTE['bold']` directly in string formatting, which caused ANSI escape codes to be emitted even when stdout is not connected to a terminal (e.g., when\u2026",
+    "additions": 1,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "Dinov3 vit was refactored to introduce a module between top level and layers to have the capture decorators work as intended. Otherwise, it would force the backbone to do manual collection. This introduced a small conversion which is now a\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44429",
-    "created_at": "2026-03-04T07:47:02Z",
-    "deletions": 6,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44665",
+    "created_at": "2026-03-13T13:50:26Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44429/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44429",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44429,
+    "files_url": "https://github.com/huggingface/transformers/pull/44665/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44665",
+    "labels": [],
+    "merged": true,
+    "number": 44665,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix ANSI codes emitted in loading_report when stdout is not a TTY",
-    "updated_at": "2026-03-04T13:58:46Z"
+    "title": "[`Chmv2`] Fix conversion after capture refactor",
+    "updated_at": "2026-03-13T14:28:33Z"
   },
   {
-    "additions": 10,
-    "author": "kaixuanliu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "@IlyasMoutawwakil pls help review, thx!",
-    "changed_files": 2,
+    "additions": 12,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "Fixes https://github.com/huggingface/transformers/issues/44625 We can create a clf model with LLM or VLM backbone like: ```python from transformers import AutoConfig, AutoModelForSequenceClassification model_name = \"onnx-internal-testing/t\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44428",
-    "created_at": "2026-03-04T07:41:20Z",
-    "deletions": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44664",
+    "created_at": "2026-03-13T13:39:52Z",
+    "deletions": 182,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44428/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44428",
+    "files_url": "https://github.com/huggingface/transformers/pull/44664/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44664",
     "labels": [],
-    "merged": true,
-    "number": 44428,
-    "review_comments_count": 1,
-    "state": "closed",
-    "title": "Add XPU Expectations for vibe voice acoustic tokenizer tests",
-    "updated_at": "2026-03-16T08:53:42Z"
+    "merged": false,
+    "number": 44664,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Generic Sequence Classifier works for multimodal models",
+    "updated_at": "2026-03-13T15:41:29Z"
   },
   {
-    "additions": 43,
-    "author": "Jaredw2289-svg",
+    "additions": 1,
+    "author": "Ker102",
     "author_association": "NONE",
-    "body_excerpt": "Fixes #44297 ## Problem `tokenizer.save_pretrained()` overwrites `tokenizer_class` in `tokenizer_config.json` with the current wrapper class (e.g. `PreTrainedTokenizerFast`) instead of preserving the original class from the loaded config (\u2026",
-    "changed_files": 2,
+    "body_excerpt": "## Summary Fixes #44661 \u2014 `transformers add-new-model-like` crashes with `AttributeError: 'ModelInfos' object has no attribute 'tokenizer_class'` when selecting a model that is in `TOKENIZER_MAPPING_NAMES`. ## Root Cause PR #40936 refactor\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44427",
-    "created_at": "2026-03-04T06:03:56Z",
-    "deletions": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44663",
+    "created_at": "2026-03-13T13:25:48Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44427/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44427",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44663/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44663",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44427,
+    "number": 44663,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(tokenization): preserve original tokenizer_class in save_pretrained",
-    "updated_at": "2026-03-11T02:59:12Z"
+    "title": "fix: restore missing `tokenizer_class` attribute in `ModelInfos.__init__`",
+    "updated_at": "2026-03-13T14:02:00Z"
   },
   {
-    "additions": 29,
-    "author": "kaixuanliu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "@IlyasMoutawwakil Can you help review? Thx!",
-    "changed_files": 1,
+    "additions": 7084,
+    "author": "CyrilSterling",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR supports PenguinVL model. Paper: https://arxiv.org/abs/2603.06569 Github repo: https://github.com/tencent-ailab/Penguin-VL HuggingFace Model: https://huggingface.co/collections/tencent/ai-lab ## Before submi\u2026",
+    "changed_files": 20,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44426",
-    "created_at": "2026-03-04T05:57:34Z",
-    "deletions": 10,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44662",
+    "created_at": "2026-03-13T13:02:26Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44426/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44426",
+    "files_url": "https://github.com/huggingface/transformers/pull/44662/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44662",
     "labels": [],
-    "merged": true,
-    "number": 44426,
-    "review_comments_count": 2,
-    "state": "closed",
-    "title": "update the expected output for qwen2_5_vl w/ pytorch 2.10 XPU",
-    "updated_at": "2026-03-04T09:55:55Z"
+    "merged": false,
+    "number": 44662,
+    "review_comments_count": 95,
+    "state": "open",
+    "title": "[model] Add PenguinVL implementation",
+    "updated_at": "2026-04-01T10:53:25Z"
   },
   {
-    "additions": 1,
-    "author": "qgallouedec",
-    "author_association": "MEMBER",
-    "body_excerpt": "I believe the second `if` should be `elif` so the else branch only triggers when neither the string-truncation NOR the float-formatting conditions apply. Otherwise it overwrites the truncation message with the original long string.",
+    "additions": 5,
+    "author": "DogWala",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes #44637 This PR makes the PEFT `load_best_model_at_end` path in `Trainer` use a CPU-first adapter reload path during best-model loading. Previously, when training a PEFT model, `Trainer` could reload the best a\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44425",
-    "created_at": "2026-03-04T02:48:00Z",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44660",
+    "created_at": "2026-03-13T12:59:25Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44425/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44425",
+    "files_url": "https://github.com/huggingface/transformers/pull/44660/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44660",
     "labels": [],
     "merged": false,
-    "number": 44425,
+    "number": 44660,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Fix conditional check for float formatting",
-    "updated_at": "2026-03-04T02:48:41Z"
+    "title": "Fix: avoid late CUDA OOM in load_best_model_at_end with PEFT models",
+    "updated_at": "2026-03-13T13:18:52Z"
   },
   {
-    "additions": 6,
-    "author": "jw9603",
+    "additions": 0,
+    "author": "Olexandr88",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes `AttributeError: 'str' object has no attribute 'to'` when using `transformers serve --continuous-batching` with multimodal models like Qwen3.5-9B. `processor.apply_chat_template()` returns a plain string (not\u2026",
+    "body_excerpt": "Removes outdated use_diff entry from the docstring. The parameter is not present in the method signature or implementation.",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44424",
-    "created_at": "2026-03-04T00:56:08Z",
-    "deletions": 2,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44659",
+    "created_at": "2026-03-13T11:08:13Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44424/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44424",
+    "files_url": "https://github.com/huggingface/transformers/pull/44659/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44659",
     "labels": [],
     "merged": false,
-    "number": 44424,
+    "number": 44659,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix `transformers serve --continuous-batching` for multimodal models",
-    "updated_at": "2026-03-05T09:16:25Z"
+    "state": "open",
+    "title": "docs: remove outdated use_diff docstring from DistributedConfig.to_js\u2026",
+    "updated_at": "2026-03-13T13:42:29Z"
   },
   {
-    "additions": 117,
-    "author": "mitre88",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Adds a Spanish (es) translation of the `conversations.md` guide, which covers the fundamentals of using chat models in Transformers. ### Translated sections: - Chat CLI usage - TextGenerationPipeline in chat mode -\u2026",
+    "additions": 18,
+    "author": "Charly21r",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes an issue where the `.modelcard` attribute of a pipeline is not initialized. Without this fix, calling `save_pretrained` on a pipeline (e.g., ASR pipeline) raises an `AttributeError` because `.modelcard` does n\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44422",
-    "created_at": "2026-03-04T00:42:43Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44658",
+    "created_at": "2026-03-13T10:36:22Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44422/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44422",
+    "files_url": "https://github.com/huggingface/transformers/pull/44658/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44658",
     "labels": [],
-    "merged": true,
-    "number": 44422,
+    "merged": false,
+    "number": 44658,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "docs: add Spanish translation for conversations.md (chat basics)",
-    "updated_at": "2026-03-04T16:45:24Z"
+    "title": "Fix: fix Pipeline's save_pretrained method (issue #44655)",
+    "updated_at": "2026-03-13T14:08:27Z"
   },
   {
-    "additions": 309,
-    "author": "michaelbenayoun",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? When we shard weights according to a TP plan, we do not update the corresponding parent module attributes. For instance if we shard the weight of a `torch.nn.Linear`, we should also update its `in_features` or `out_\u2026",
-    "changed_files": 4,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44421",
-    "created_at": "2026-03-03T22:51:47Z",
-    "deletions": 5,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44421/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44421",
+    "additions": 1,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "@ArthurZucker @Cyrilvallez pls help review. Thx!",
+    "changed_files": 1,
+    "cluster_id": "cluster-43324-21",
+    "cluster_ids": [
+      "cluster-43324-21"
+    ],
+    "cluster_role": "member",
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44657",
+    "created_at": "2026-03-13T10:25:07Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44657/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44657",
     "labels": [],
     "merged": true,
-    "number": 44421,
+    "number": 44657,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Update parent module attributes when sharding with TP",
-    "updated_at": "2026-03-05T23:32:06Z"
+    "title": "fix bug embedding_size mismatch with hidden_size in electra model test",
+    "updated_at": "2026-04-01T08:24:54Z"
   },
   {
-    "additions": 249,
-    "author": "stevhliu",
-    "author_association": "MEMBER",
-    "body_excerpt": "- removes \"Number of accelerators\" section from \"Accelerator selection\" guide since this is probably pretty commonly known - add a new \"DDP\" guide - refactored \"Accelerate\" guide with a more focused overview of what it is and how to config\u2026",
-    "changed_files": 5,
+    "additions": 80,
+    "author": "juliendenize",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR add support to mistral-common v1.10.0. This involves: - reasoning effort feature - Python 3.14 Also add `image_sizes` in `apply_chat_template` output to match what is expected by standard processor.",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44420",
-    "created_at": "2026-03-03T22:41:59Z",
-    "deletions": 250,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44656",
+    "created_at": "2026-03-13T10:24:11Z",
+    "deletions": 15,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44420/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44420",
+    "files_url": "https://github.com/huggingface/transformers/pull/44656/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44656",
     "labels": [],
-    "merged": false,
-    "number": 44420,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "[docs] distributed training",
-    "updated_at": "2026-03-11T17:36:12Z"
+    "merged": true,
+    "number": 44656,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "[MistralCommonBackend] Upgrade mistral-common to v1.10.0",
+    "updated_at": "2026-03-16T12:46:29Z"
   },
   {
-    "additions": 6,
-    "author": "michaelbenayoun",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? To be merged after #44302 and https://github.com/huggingface/kernels/pull/285. It adds the `neuron` device in checks for custom kernels, enabling to load kernels for Neuron devices.",
+    "additions": 13,
+    "author": "jiqing-feng",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR fixes the AWQ test suite to align with the GPTQModel migration (following #41567 and #42776). ### Changes - **Fix `replace_with_awq_linear` return value**: The function now returns the model directly instead of a tuple `(model, _)`\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44417",
-    "created_at": "2026-03-03T20:15:26Z",
-    "deletions": 6,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44654",
+    "created_at": "2026-03-13T07:31:19Z",
+    "deletions": 13,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44417/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44417",
+    "files_url": "https://github.com/huggingface/transformers/pull/44654/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44654",
     "labels": [],
     "merged": true,
-    "number": 44417,
+    "number": 44654,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Neuron kernels integration",
-    "updated_at": "2026-03-05T17:09:39Z"
+    "title": "Fix AWQ tests for GPTQModel migration",
+    "updated_at": "2026-03-13T16:28:14Z"
   },
   {
-    "additions": 1,
-    "author": "tyler-romero",
+    "additions": 18,
+    "author": "kaixuanliu",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Register `olmo_hybrid` in `TOKENIZER_MAPPING_NAMES` so auto-tokenizer resolution works, matching the other auto-registrations already in place for this model.",
+    "body_excerpt": "@zucchini-nlp, can you help review? Thx! unit tests to reproduce this bug: `tests/models/phi4_multimodal/test_modeling_phi4_multimodal.py::Phi4MultimodalIntegrationTest::test_audio_text_generation`",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44416",
-    "created_at": "2026-03-03T19:30:56Z",
-    "deletions": 0,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44653",
+    "created_at": "2026-03-13T07:14:25Z",
+    "deletions": 9,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44416/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44416",
+    "files_url": "https://github.com/huggingface/transformers/pull/44653/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44653",
     "labels": [],
-    "merged": true,
-    "number": 44416,
-    "review_comments_count": 2,
+    "merged": false,
+    "number": 44653,
+    "review_comments_count": 7,
     "state": "closed",
-    "title": "[tiny] Add olmo_hybrid to tokenizer auto-mapping",
-    "updated_at": "2026-03-04T19:26:10Z"
+    "title": "Fix `AutoImageProcessor` to correctly detect local implementation whe\u2026",
+    "updated_at": "2026-03-20T10:33:32Z"
   },
   {
-    "additions": 2,
-    "author": "SunMarc",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR removes @MekkCyber from the PR template. cc @Rocketknight1 you only need to ping me now ;)",
+    "additions": 8,
+    "author": "gambletan",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Fixes #44623 `processor.save_pretrained()` in v5 only saves the unified `processor_config.json` with nested sub-processor configs, but does not save standalone config files like `preprocessor_config.json` for the image processor\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44415",
-    "created_at": "2026-03-03T16:59:08Z",
-    "deletions": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44652",
+    "created_at": "2026-03-13T05:38:10Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44415/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44415",
-    "labels": [],
-    "merged": true,
-    "number": 44415,
+    "files_url": "https://github.com/huggingface/transformers/pull/44652/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44652",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44652,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Update PR template",
-    "updated_at": "2026-03-04T14:13:04Z"
+    "title": "Fix processor.save_pretrained missing sub-processor config files",
+    "updated_at": "2026-03-13T12:03:37Z"
   },
   {
-    "additions": 35,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/44303 - see also comments here https://github.com/huggingface/transformers/pull/44316#issuecomment-3984362089. Supersedes https://github.com/huggingface/trans\u2026",
-    "changed_files": 1,
+    "additions": 10,
+    "author": "gambletan",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Fixes #44625 When passing `num_labels` to `AutoConfig.from_pretrained` for Qwen3.5, the value is set on the outer `Qwen3_5Config` but not propagated to `text_config`. This causes `AutoModelForSequenceClassification` to use the d\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44414",
-    "created_at": "2026-03-03T16:47:47Z",
-    "deletions": 39,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44651",
+    "created_at": "2026-03-13T05:35:29Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44414/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44414",
+    "files_url": "https://github.com/huggingface/transformers/pull/44651/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44651",
     "labels": [],
-    "merged": true,
-    "number": 44414,
+    "merged": false,
+    "number": 44651,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Reduce tqdm verbosity during model loading",
-    "updated_at": "2026-03-03T16:57:56Z"
+    "title": "Fix Qwen3.5 num_labels not propagated to text_config",
+    "updated_at": "2026-03-13T13:39:43Z"
   },
   {
-    "additions": 4,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title.",
-    "changed_files": 1,
-    "cluster_id": "cluster-44053-8",
-    "cluster_ids": [
-      "cluster-44053-8"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44413",
-    "created_at": "2026-03-03T16:24:43Z",
-    "deletions": 4,
+    "additions": 188,
+    "author": "shaealh",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Closes #44593 ## Summary - use generation_input_ids/generation_attention_mask when provided for decoder-only models - otherwise infer prompt from leading -100 labels and build left-padded prompt batch - return completion tokens for decoder\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44650",
+    "created_at": "2026-03-13T05:33:59Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44413/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44413",
+    "files_url": "https://github.com/huggingface/transformers/pull/44650/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44650",
     "labels": [],
-    "merged": true,
-    "number": 44413,
+    "merged": false,
+    "number": 44650,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix peft conversion mappings",
-    "updated_at": "2026-03-03T17:08:39Z"
+    "state": "open",
+    "title": "Fix Seq2SeqTrainer generation path for decoder-only models",
+    "updated_at": "2026-04-02T21:23:53Z"
   },
   {
-    "additions": 138,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Extends type checking to `src/transformers/quantizers`",
-    "changed_files": 28,
+    "additions": 12,
+    "author": "gambletan",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Fixes #44623 In v5.3.0, `ProcessorMixin.save_pretrained()` no longer calls `save_pretrained()` on non-tokenizer components (e.g. `image_processor`, `feature_extractor`). This means their individual config files (e.g. `preprocess\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 25,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44412",
-    "created_at": "2026-03-03T14:53:31Z",
-    "deletions": 74,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44649",
+    "created_at": "2026-03-13T05:22:42Z",
+    "deletions": 207,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44412/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44412",
+    "files_url": "https://github.com/huggingface/transformers/pull/44649/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44649",
     "labels": [],
-    "merged": true,
-    "number": 44412,
-    "review_comments_count": 33,
+    "merged": false,
+    "number": 44649,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "chore(typing): Add type checking to `src/transformers/quantizers`",
-    "updated_at": "2026-03-11T11:24:11Z"
+    "title": "fix: ensure processor.save_pretrained saves all component files",
+    "updated_at": "2026-03-13T05:36:54Z"
   },
   {
-    "additions": 59,
-    "author": "burtenshaw",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR updates `AGENTS.md` to discourage duplicated and trivial work by agents. - CLAUDE.md-> AGENTS.md - ssue-thread coordination before PRs - mandatory duplicate-PR checks with gh commands - no one-off busywork P\u2026",
-    "changed_files": 3,
+    "additions": 0,
+    "author": "gambletan",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Fixes #44625 When `num_labels` or `id2label` are passed to `Qwen3_5Config` (e.g. via `AutoConfig.from_pretrained(model_name, num_labels=1)`), they are only set on the outer composite config but **not forwarded to `text_config`**\u2026",
+    "changed_files": 0,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44411",
-    "created_at": "2026-03-03T11:35:13Z",
-    "deletions": 1,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44411/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44411",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44648",
+    "created_at": "2026-03-13T05:22:03Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44648/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44648",
     "labels": [],
-    "merged": true,
-    "number": 44411,
-    "review_comments_count": 4,
+    "merged": false,
+    "number": 44648,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Update agentic contributions guidelines in AGENTS.md to force yielding.",
-    "updated_at": "2026-03-12T09:28:45Z"
+    "title": "fix: propagate num_labels to text_config in Qwen3.5",
+    "updated_at": "2026-03-13T05:33:26Z"
   },
   {
-    "additions": 38,
-    "author": "zvik",
-    "author_association": "FIRST_TIMER",
-    "body_excerpt": "# What does this PR do? This PR allows the Granite-speech model to use hidden states from the encoder hidden layers. This is an internal model option that is required for the next generation of Granite-speech models. ## Changes: - New conf\u2026",
+    "additions": 9,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "@remi-or @ArthurZucker @McPatate pls help review, thx!",
+    "changed_files": 1,
+    "cluster_id": "cluster-43324-21",
+    "cluster_ids": [
+      "cluster-43324-21"
+    ],
+    "cluster_role": "member",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44647",
+    "created_at": "2026-03-13T04:55:26Z",
+    "deletions": 6,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44647/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44647",
+    "labels": [],
+    "merged": false,
+    "number": 44647,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add more generic device support for continuous batching",
+    "updated_at": "2026-03-13T15:43:23Z"
+  },
+  {
+    "additions": 4,
+    "author": "LincolnBurrows2017",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Fixed typo in Qwen3.5 and Qwen3-VL-MoE model files ```diff - Since Qwen3.5 use timestamps to seperate videos + Since Qwen3.5 use timestamps to separate videos ```",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44408",
-    "created_at": "2026-03-03T07:50:39Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44646",
+    "created_at": "2026-03-13T04:48:06Z",
     "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44408/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44408",
+    "files_url": "https://github.com/huggingface/transformers/pull/44646/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44646",
     "labels": [],
     "merged": false,
-    "number": 44408,
-    "review_comments_count": 5,
+    "number": 44646,
+    "review_comments_count": 0,
     "state": "open",
-    "title": "Add option to export  encoder hidden states for Granite-speech",
-    "updated_at": "2026-03-04T10:49:49Z"
+    "title": "Fix typo: seperate -> separate",
+    "updated_at": "2026-03-13T15:27:08Z"
   },
   {
-    "additions": 23,
-    "author": "hongping-zh",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Summary Adds an \"Energy Efficiency Considerations\" section to the bitsandbytes quantization documentation, providing practical guidance on the energy implications of different quantization configurations. ## Motivation This addresses th\u2026",
-    "changed_files": 1,
+    "additions": 4,
+    "author": "sywangyi",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "- Intel XPU: @IlyasMoutawwakil ``` ======================================================================== FAILURES ======================================================================== _________________________________________________\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44407",
-    "created_at": "2026-03-03T04:42:57Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44645",
+    "created_at": "2026-03-13T02:53:31Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44407/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44407",
+    "files_url": "https://github.com/huggingface/transformers/pull/44645/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44645",
     "labels": [],
-    "merged": false,
-    "number": 44407,
+    "merged": true,
+    "number": 44645,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "docs: add energy efficiency considerations to bitsandbytes quantization guide",
-    "updated_at": "2026-03-25T11:53:49Z"
+    "state": "closed",
+    "title": "fix Image.open failure in case \"tests/models/prompt_depth_anything/te\u2026",
+    "updated_at": "2026-03-27T11:11:05Z"
   },
   {
-    "additions": 21,
-    "author": "medhakimbedhief",
+    "additions": 2,
+    "author": "sywangyi",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Adds sequence-classification support for Qwen3.5 in AutoModelForSequenceClassification. **What does this PR do?** This PR enables loading Qwen3.5 checkpoints with `AutoModelForSequenceClassification`, which previously failed with: `ValueEr\u2026",
-    "changed_files": 5,
+    "body_excerpt": "\u2026MXFP4PseudoquantTest::test_quantized_model fail in xpu ## Who can review? - quantization: @SunMarc - Intel XPU: @IlyasMoutawwakil",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44406",
-    "created_at": "2026-03-03T03:44:37Z",
-    "deletions": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44644",
+    "created_at": "2026-03-13T02:02:45Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44406/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44406",
+    "files_url": "https://github.com/huggingface/transformers/pull/44644/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44644",
     "labels": [],
     "merged": true,
-    "number": 44406,
-    "review_comments_count": 0,
+    "number": 44644,
+    "review_comments_count": 6,
     "state": "closed",
-    "title": "Add Qwen3.5 support for sequence classification",
-    "updated_at": "2026-03-04T10:34:22Z"
+    "title": "fix tests/quantization/fp_quant_integration/test_fp_quant.py::FPQuant\u2026",
+    "updated_at": "2026-03-27T14:14:13Z"
   },
   {
-    "additions": 0,
-    "author": "Rocketknight1",
-    "author_association": "MEMBER",
-    "body_excerpt": "Some generate tests have a ~1% chance of generating short outputs because they hit an EOS token early, which causes the test to flake because it asserts the output shape. This PR enforces `min_length` so that doesn't happen!",
-    "changed_files": 3,
+    "additions": 5,
+    "author": "joshuaswanson",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "The generic `labels` docstring in `ModelArgs` says \"masked language modeling loss\" and doesn't mention that causal LM models shift labels internally. This has tripped up a lot of users who pre-shift their labels and end up training next-ne\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44401",
-    "created_at": "2026-03-02T18:26:56Z",
-    "deletions": 77,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44642",
+    "created_at": "2026-03-12T23:47:11Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44401/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44401",
+    "files_url": "https://github.com/huggingface/transformers/pull/44642/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44642",
     "labels": [],
-    "merged": true,
-    "number": 44401,
-    "review_comments_count": 4,
-    "state": "closed",
-    "title": "Enforce min length in some generate tests",
-    "updated_at": "2026-03-04T14:05:50Z"
+    "merged": false,
+    "number": 44642,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Clarify that causal LM labels are shifted internally",
+    "updated_at": "2026-03-13T00:02:30Z"
   },
   {
-    "additions": 14,
-    "author": "Kokonico",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? As per title, confirms interactive terminal before adding formatting to loading_report output. <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear\u2026",
+    "additions": 1,
+    "author": "kmbhattt-aws",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Issue: A full 4D attention mask of shape `[1, 1, seq_len, seq_len]` is being created during attention, even when not using alibi for positional embeddings. - This occupied extra memory during training. Root Cause: T\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44400",
-    "created_at": "2026-03-02T18:21:48Z",
-    "deletions": 6,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44400/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44400",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44641",
+    "created_at": "2026-03-12T23:01:11Z",
+    "deletions": 1,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44641/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44641",
     "labels": [],
     "merged": false,
-    "number": 44400,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: interactive terminal checks for formatting in loading_report.py",
-    "updated_at": "2026-03-09T14:46:29Z"
+    "number": 44641,
+    "review_comments_count": 1,
+    "state": "open",
+    "title": "Conditinally passing and_mask_function arg to create_causal_mask ",
+    "updated_at": "2026-03-13T02:09:22Z"
   },
   {
-    "additions": 3,
-    "author": "weiguangli-io",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary Fixes #44384 In `Qwen3_5TextModel.forward`, after splitting `position_ids` into `text_position_ids` (index 0, for text) and `position_ids` (indices 1:, for temporal/height/width), the decoder layer call incorrectly passed `posit\u2026",
-    "changed_files": 3,
+    "additions": 11,
+    "author": "michaelbenayoun",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? - Add `\"embedding_colwise\"` tp plan - Add register methods for `ParallelInterface`. Without it, we can register plans with the register method, but not the corresponding `plan_to_weight_dim` and `plan_to_bias_dim`.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44399",
-    "created_at": "2026-03-02T17:28:59Z",
-    "deletions": 3,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44640",
+    "created_at": "2026-03-12T20:14:06Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44399/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44399",
+    "files_url": "https://github.com/huggingface/transformers/pull/44640/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44640",
     "labels": [],
     "merged": true,
-    "number": 44399,
+    "number": 44640,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix position_ids typo in Qwen3_5TextModel forward pass",
-    "updated_at": "2026-03-06T01:48:22Z"
+    "title": "Add register method for `ParallelInterface`",
+    "updated_at": "2026-03-13T18:57:48Z"
   },
   {
-    "additions": 4,
-    "author": "weiguangli-io",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Adds the missing ReLU activation in `GlmMoeDsaIndexer.forward()` on per-head q\u00b7k scores before the weighted sum across heads. The reference DeepSeek-V3.2 `fp8_index` kernel applies `T.max(logits, 0)` (i.e., ReLU) a\u2026",
-    "changed_files": 2,
+    "additions": 24099,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "## Summary Fixes bugs introduced during the `__init__` \u2192 `@dataclass` conversion in #41250. All are incorrect default values caught by automated comparison of `__init__` signatures (main) vs dataclass fields (PR). | Model | Param | Was | S\u2026",
+    "changed_files": 931,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44398",
-    "created_at": "2026-03-02T16:41:01Z",
-    "deletions": 0,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44639",
+    "created_at": "2026-03-12T16:49:54Z",
+    "deletions": 38773,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44398/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44398",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44639/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44639",
+    "labels": [],
     "merged": false,
-    "number": 44398,
+    "number": 44639,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add missing ReLU in GlmMoeDsaIndexer scoring",
-    "updated_at": "2026-03-04T13:54:20Z"
+    "title": "Fix incorrect default values in config dataclass migration",
+    "updated_at": "2026-03-12T16:50:10Z"
   },
   {
-    "additions": 86,
-    "author": "weiguangli-io",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes #44380 `GPT2Attention.forward()` did not pass the `scaling` parameter to `attention_interface`, causing `scale_attn_weights` and `scale_attn_by_inverse_layer_idx` config options to be silently ignored when usi\u2026",
+    "additions": 19,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title. Supersedes https://github.com/huggingface/transformers/pull/44614. This one is the result of a long debugging session and discussion with @vasqu. The issue is as follow: - Backbone ALWAYS need to c\u2026",
     "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 8,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44397",
-    "created_at": "2026-03-02T16:14:37Z",
-    "deletions": 50,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44638",
+    "created_at": "2026-03-12T16:19:49Z",
+    "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44397/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44397",
+    "files_url": "https://github.com/huggingface/transformers/pull/44638/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44638",
     "labels": [],
     "merged": true,
-    "number": 44397,
-    "review_comments_count": 17,
+    "number": 44638,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Fix GPT2 attention scaling ignored in SDPA/FlashAttention",
-    "updated_at": "2026-03-04T16:47:42Z"
+    "title": "Fix output capturing for Backbones",
+    "updated_at": "2026-03-12T17:11:32Z"
   },
   {
-    "additions": 3,
-    "author": "ArthurZucker",
+    "additions": 571,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Some renaming should just never be applied when the weight format already matches. (this is actually regardless of remote code). This allows us to remove 1 test added in https://github.com/huggingface/transformers/c\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? - Adds an `httpx` tracer to gather metrics about network calls - Collect and store metrics and generates an artifact in CI - Can be used locally with `DEBUG_NETWORK` - Activated in CircleCI example of local run: ```\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44396",
-    "created_at": "2026-03-02T15:50:27Z",
-    "deletions": 16,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44636",
+    "created_at": "2026-03-12T15:25:10Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44396/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44396",
+    "files_url": "https://github.com/huggingface/transformers/pull/44636/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44636",
     "labels": [],
     "merged": true,
-    "number": 44396,
-    "review_comments_count": 2,
+    "number": 44636,
+    "review_comments_count": 5,
     "state": "closed",
-    "title": "[ Dynamic weight loader] fix remote code when format matches",
-    "updated_at": "2026-03-03T17:53:39Z"
+    "title": "feat(ci): added a network debug report",
+    "updated_at": "2026-03-18T19:19:03Z"
   },
   {
-    "additions": 153,
+    "additions": 111,
+    "author": "RyanMullins",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Per a conversion with @Cyrilvallez on Slack on how to make Gemma models more compatible with modular inheritance, this PR: * Updates Gemma models to use `nn.parameter.Buffer` instead of a `self.register_buffer()` fo\u2026",
+    "changed_files": 32,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44635",
+    "created_at": "2026-03-12T14:47:46Z",
+    "deletions": 87,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44635/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44635",
+    "labels": [],
+    "merged": false,
+    "number": 44635,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[Gemma] Modular-friendly buffers",
+    "updated_at": "2026-03-18T10:44:25Z"
+  },
+  {
+    "additions": 30,
     "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title!",
-    "changed_files": 6,
+    "body_excerpt": "# What does this PR do? As per the title",
+    "changed_files": 3,
     "cluster_id": "cluster-44053-8",
     "cluster_ids": [
       "cluster-44053-8"
     ],
     "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44395",
-    "created_at": "2026-03-02T14:52:12Z",
-    "deletions": 31,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44634",
+    "created_at": "2026-03-12T14:04:36Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44395/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44395",
+    "files_url": "https://github.com/huggingface/transformers/pull/44634/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44634",
     "labels": [],
     "merged": true,
-    "number": 44395,
-    "review_comments_count": 22,
+    "number": 44634,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Fix kernels security issue",
-    "updated_at": "2026-03-03T15:28:20Z"
+    "title": "Fix lfm2 kernel path",
+    "updated_at": "2026-03-12T15:00:59Z"
   },
   {
-    "additions": 4736,
+    "additions": 26,
     "author": "eustlb",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? #43514 precedes this PR",
-    "changed_files": 60,
+    "body_excerpt": "# What does this PR do? As per title \ud83e\udd17",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44394",
-    "created_at": "2026-03-02T14:49:05Z",
-    "deletions": 6717,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44394/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44394",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44633",
+    "created_at": "2026-03-12T13:35:48Z",
+    "deletions": 11,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44633/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44633",
     "labels": [],
-    "merged": false,
-    "number": 44394,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "\ud83d\udea8\ud83d\udea7 FeatureExtractor \u2192 AudioProcessor",
-    "updated_at": "2026-03-23T11:19:52Z"
+    "merged": true,
+    "number": 44633,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "[medasr] doc update",
+    "updated_at": "2026-03-16T09:39:50Z"
   },
   {
-    "additions": 61,
-    "author": "ebezzam",
+    "additions": 35,
+    "author": "Abdennacer-Badaoui",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? The acoustic tokenizer was updated to use `VoxtralRealtimeConv1dPaddingCache` in #43625 but the ASR model wasn't updated.",
-    "changed_files": 2,
+    "body_excerpt": "Summary - Update test expectations for affected models - Add some needed dependencies - Fix TypeError: `GenerationMixin.prepare_inputs_for_generation()` got multiple values for argument 'next_sequence_length' in Qwen2.5-Omni talker by pass\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44392",
-    "created_at": "2026-03-02T13:41:19Z",
-    "deletions": 100,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44632",
+    "created_at": "2026-03-12T13:32:33Z",
+    "deletions": 22,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44392/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44392",
+    "files_url": "https://github.com/huggingface/transformers/pull/44632/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44632",
     "labels": [],
     "merged": true,
-    "number": 44392,
-    "review_comments_count": 0,
+    "number": 44632,
+    "review_comments_count": 6,
     "state": "closed",
-    "title": "[VibeVoice ASR] Use updated padding cache for ASR model.",
-    "updated_at": "2026-03-02T13:51:02Z"
+    "title": "[AMD CI] Fix test failures across important models ",
+    "updated_at": "2026-03-17T14:58:10Z"
   },
   {
-    "additions": 3040,
-    "author": "ebezzam",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Tokenizer was updated to Voxtral cache object in #43625, but forgot to update that of the ASR model",
-    "changed_files": 30,
+    "additions": 33,
+    "author": "RyanMullins",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Updates the weights conversion scripts for Gemma to: * Use the new `SentencePieceExtractor` class to get the vocab and merges from the SPM * Always initialize and save the unified `GemmaTokenizer` class ## Before su\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44391",
-    "created_at": "2026-03-02T13:29:59Z",
-    "deletions": 311,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44631",
+    "created_at": "2026-03-12T13:32:25Z",
+    "deletions": 45,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44391/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44391",
+    "files_url": "https://github.com/huggingface/transformers/pull/44631/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44631",
     "labels": [],
-    "merged": false,
-    "number": 44391,
+    "merged": true,
+    "number": 44631,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[VibeVoice ASR] Use newer cache object for modular",
-    "updated_at": "2026-03-02T13:34:23Z"
+    "title": "[Gemma] Update conversion scripts for Transformers v5 Comaptibility",
+    "updated_at": "2026-03-18T10:39:53Z"
   },
   {
-    "additions": 3330,
-    "author": "liding-nv",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 13,
+    "additions": 42,
+    "author": "MaybeSam05",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes a bug where `num_labels` passed to `AutoConfig.from_pretrained` for Qwen3.5 did not propagate from the top\u2011level `Qwen3_5Config` into the `text_config`, so `AutoModelForSequenceClassification` still saw the de\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44390",
-    "created_at": "2026-03-02T13:22:21Z",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44630",
+    "created_at": "2026-03-12T13:25:54Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44390/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44390",
-    "labels": [],
-    "merged": true,
-    "number": 44390,
-    "review_comments_count": 20,
+    "files_url": "https://github.com/huggingface/transformers/pull/44630/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44630",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44630,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "add support for nemotron_3",
-    "updated_at": "2026-03-03T18:18:50Z"
+    "title": "Fix Qwen3.5 num_labels propagation to text_config (fix #44625)",
+    "updated_at": "2026-03-12T13:46:07Z"
   },
   {
-    "additions": 5,
-    "author": "Abdennacer-Badaoui",
+    "additions": 15,
+    "author": "zucchini-nlp",
     "author_association": "MEMBER",
-    "body_excerpt": "Adds explicit `timm` installation to the AMD ROCm Docker image. This causes ~200 test failures in AMD CI (e.g., [gemma3n vision tests](https://github.com/huggingface/transformers/actions/runs/22474359922/job/65104428291)). This mirrors wha\u2026",
+    "body_excerpt": "# What does this PR do? Fixes \"auto\" dtype when the model is initialized `from_config` It was already fixed for `from_pretrained` in https://github.com/huggingface/transformers/pull/42990 but vLLM creates models with `AutoModel._from_confi\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44389",
-    "created_at": "2026-03-02T13:09:55Z",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44629",
+    "created_at": "2026-03-12T13:07:55Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44389/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44389",
+    "files_url": "https://github.com/huggingface/transformers/pull/44629/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44629",
     "labels": [],
     "merged": true,
-    "number": 44389,
-    "review_comments_count": 0,
+    "number": 44629,
+    "review_comments_count": 3,
     "state": "closed",
-    "title": "[AMD CI] Add missing timm dependency to ROCm Docker image",
-    "updated_at": "2026-03-03T12:00:19Z"
+    "title": "Ensure same `dtype` for subconfig when `_from_config`",
+    "updated_at": "2026-03-13T11:35:10Z"
   },
   {
-    "additions": 0,
-    "author": "sahilmaniyar888",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? Fixes #44336 ### Summary This PR prevents ANSI style escape sequences from being emitted by `loading_report` when stdout is non-interactive (for example, redirected logs/files). ### Changes - Added a small helper `_\u2026",
-    "changed_files": 0,
+    "additions": 37,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? - `encoder_config` and `decoder_config` should return `None` for encoder / decoder config classes themselves. - The encoder / decoder model classes should have the correct config classes associated to them <!-- CURS\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44388",
-    "created_at": "2026-03-02T11:40:49Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44628",
+    "created_at": "2026-03-12T12:24:44Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44388/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44388",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44388,
-    "review_comments_count": 0,
+    "files_url": "https://github.com/huggingface/transformers/pull/44628/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44628",
+    "labels": [],
+    "merged": true,
+    "number": 44628,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Fix loading report ANSI styles for non-TTY output",
-    "updated_at": "2026-03-11T06:29:31Z"
+    "title": "Fix for `VibeVoiceAcousticTokenizer`",
+    "updated_at": "2026-03-12T13:33:02Z"
   },
   {
-    "additions": 62,
-    "author": "eustlb",
+    "additions": 141,
+    "author": "zucchini-nlp",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per title",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? The diff in revert mapping is needed, otherwise we get failures in a few models, see https://app.circleci.com/pipelines/github/huggingface/transformers/167425/workflows/fa96efe5-f810-408e-bafd-de03b7e881aa/jobs/2208\u2026",
+    "changed_files": 78,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44386",
-    "created_at": "2026-03-02T10:43:13Z",
-    "deletions": 16,
+    "comments_count": 16,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44627",
+    "created_at": "2026-03-12T12:00:31Z",
+    "deletions": 367,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44386/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44386",
+    "files_url": "https://github.com/huggingface/transformers/pull/44627/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44627",
     "labels": [],
     "merged": true,
-    "number": 44386,
-    "review_comments_count": 2,
+    "number": 44627,
+    "review_comments_count": 8,
     "state": "closed",
-    "title": "[higgs-audio-v2] fix sampling",
-    "updated_at": "2026-03-02T13:06:23Z"
+    "title": "Move VLM conversions to the main mapping",
+    "updated_at": "2026-03-17T10:13:03Z"
   },
   {
-    "additions": 8,
-    "author": "NielsRogge",
+    "additions": 11,
+    "author": "ArthurZucker",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? I wasn't able to run make check-repo locally successfully, unless the following 3 fixes were applied.",
-    "changed_files": 3,
+    "body_excerpt": "# What does this PR do? Adds a missing branch. I don't really know if this is worth it, can't find a model online that enforces the flag to `True`",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44385",
-    "created_at": "2026-03-02T09:45:15Z",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44626",
+    "created_at": "2026-03-12T11:23:21Z",
     "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44385/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44385",
+    "files_url": "https://github.com/huggingface/transformers/pull/44626/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44626",
     "labels": [],
     "merged": false,
-    "number": 44385,
-    "review_comments_count": 0,
+    "number": 44626,
+    "review_comments_count": 1,
     "state": "open",
-    "title": "Fix make check-repo",
-    "updated_at": "2026-03-02T09:54:23Z"
+    "title": "don't break legacy behavior when enforced!",
+    "updated_at": "2026-03-12T11:32:46Z"
   },
   {
-    "additions": 4,
-    "author": "JJJYmmm",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fix the attr `_no_split_modules` of `Qwen3_5Model` and `Qwen3_5MoeModel`, which affect the FSDP init of hf Trainer.",
-    "changed_files": 3,
+    "additions": 34,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Follow-up of #44549",
+    "changed_files": 16,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44382",
-    "created_at": "2026-03-02T05:42:48Z",
-    "deletions": 2,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44624",
+    "created_at": "2026-03-12T09:26:17Z",
+    "deletions": 34,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44382/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44382",
+    "files_url": "https://github.com/huggingface/transformers/pull/44624/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44624",
     "labels": [],
     "merged": true,
-    "number": 44382,
+    "number": 44624,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[Bugfix] fix qwen3.5 no split module",
-    "updated_at": "2026-03-02T16:17:22Z"
+    "title": "Fix more wrong HF hub checkpoint names",
+    "updated_at": "2026-03-12T09:59:12Z"
   },
   {
-    "additions": 2,
-    "author": "carcel-yu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? This PR adds MLU support to `is_torch_bf16_gpu_available()` by checking `torch.mlu.is_bf16_supported()` when an MLU device is available. ### Why is this needed? MLU devices support bf16 training, but they are curr\u2026",
-    "changed_files": 1,
+    "additions": 17,
+    "author": "LysandreJik",
+    "author_association": "MEMBER",
+    "body_excerpt": "CB temporarily disabled on non-cuda devices as it's currently using cuda streams by default.",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44381",
-    "created_at": "2026-03-02T05:34:49Z",
-    "deletions": 0,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44622",
+    "created_at": "2026-03-12T08:11:10Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44381/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44381",
+    "files_url": "https://github.com/huggingface/transformers/pull/44622/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44622",
     "labels": [],
-    "merged": true,
-    "number": 44381,
+    "merged": false,
+    "number": 44622,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add MLU bf16 support to is_torch_bf16_gpu_available",
-    "updated_at": "2026-03-06T14:34:30Z"
+    "title": "CB temporarily disabled on non-cuda devices",
+    "updated_at": "2026-03-16T00:00:20Z"
   },
   {
-    "additions": 579,
-    "author": "remi-or",
-    "author_association": "MEMBER",
-    "body_excerpt": "# Summary ! This PR is in draft, waiting for https://github.com/huggingface/transformers/pull/44227 to be merged This PR adds support for the `flash_attention_with_kvcache` kernel in continuoys batching. This is very efficient for decode-o\u2026",
-    "changed_files": 14,
+    "additions": 0,
+    "author": "KoichiYasuoka",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes #43170 for `modelcard` removal Quick reproduce: ``` from transformers import pipeline fmp=pipeline(\"fill-mask\",\"google-bert/bert-base-cased\") fmp.save_pretrained(\"tmpdir\") ``` ## Before submitting - [ ] This P\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44379",
-    "created_at": "2026-03-01T23:13:17Z",
-    "deletions": 235,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44621",
+    "created_at": "2026-03-12T08:04:29Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44379/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44379",
+    "files_url": "https://github.com/huggingface/transformers/pull/44621/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44621",
     "labels": [],
     "merged": true,
-    "number": 44379,
-    "review_comments_count": 19,
+    "number": 44621,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[CB] Add paged_attention kernel",
-    "updated_at": "2026-03-09T22:16:31Z"
+    "title": "pipelines do not have modelcard",
+    "updated_at": "2026-03-13T14:28:48Z"
   },
   {
-    "additions": 1,
-    "author": "redpanda1995",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "additions": 15,
+    "author": "LysandreJik",
+    "author_association": "MEMBER",
+    "body_excerpt": "FastAPI doesn't play well with `from __future__ import annotations`. This PR reverts this change and correctly guards against unprotected optional imports. Reverts https://github.com/huggingface/transformers/pull/44256",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44378",
-    "created_at": "2026-03-01T22:57:50Z",
-    "deletions": 1,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44620",
+    "created_at": "2026-03-12T07:56:55Z",
+    "deletions": 16,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44378/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44378",
+    "files_url": "https://github.com/huggingface/transformers/pull/44620/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44620",
     "labels": [],
-    "merged": false,
-    "number": 44378,
+    "merged": true,
+    "number": 44620,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix logging with each layer with ms-swift lora fine-tuning ",
-    "updated_at": "2026-03-02T14:18:22Z"
+    "title": "Fix transformers serve's 422 unprocessable entity",
+    "updated_at": "2026-03-16T13:41:44Z"
   },
   {
-    "additions": 175,
-    "author": "redpanda1995",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes TODO: Implement proper TP support for compressed tensors quantization <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release not\u2026",
-    "changed_files": 1,
+    "additions": 43,
+    "author": "yunhaoli24",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Set `add_bos_token=True` and `add_eos_token=True` by default in `DebertaV2Tokenizer` to fix the regression where `add_special_tokens=True` doesn't add BOS/EOS tokens for `microsoft/mdeberta-v3-base` tokenizer in transformers >=5\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44377",
-    "created_at": "2026-03-01T22:46:07Z",
-    "deletions": 10,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44618",
+    "created_at": "2026-03-12T04:46:19Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44377/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44377",
+    "files_url": "https://github.com/huggingface/transformers/pull/44618/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44618",
     "labels": [],
     "merged": false,
-    "number": 44377,
+    "number": 44618,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Implement Tensor Parallelism (TP) support for compressed tensors quantization",
-    "updated_at": "2026-03-02T14:15:47Z"
+    "title": "fix: Add BOS/EOS tokens by default for DeBERTa v2 tokenizer",
+    "updated_at": "2026-03-16T05:28:25Z"
   },
   {
-    "additions": 4,
-    "author": "stuckvgn",
+    "additions": 7,
+    "author": "s-zx",
     "author_association": "NONE",
-    "body_excerpt": "## Summary The quick-start code examples in `flan-t5.md` and `flan-ul2.md` use `\"A step by step recipe to make bolognese pasta:\"` as the demo prompt, with output that includes `ground beef`. This PR replaces it with `\"A step by step recipe\u2026",
-    "changed_files": 2,
+    "body_excerpt": "## Summary Add fallback to bfloat16 when Float8 dtype fails to set, preventing TypeError when loading FP8 models on PyTorch builds without Float8_e4m3fnStorage support. ## Root Cause `torch.set_default_dtype(dtype)` raises `TypeError: coul\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44376",
-    "created_at": "2026-03-01T17:41:48Z",
-    "deletions": 4,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44616",
+    "created_at": "2026-03-11T23:00:15Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44376/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44376",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44616/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44616",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44376,
+    "number": 44616,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "docs: update Flan-T5 and Flan-UL2 example to use plant-based recipe prompt",
-    "updated_at": "2026-03-14T06:47:36Z"
+    "title": "fix: add Float8 dtype fallback in modeling_utils.py",
+    "updated_at": "2026-03-18T16:02:54Z"
   },
   {
-    "additions": 6829,
-    "author": "NielsRogge",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR adds RF-DETR using Codex 5.3. It did everything: modular (in 600 lines of code), fast and slow image processors, conversion script with bells and whistles (setting `id2label` etc.) To do: - [x] verify loss c\u2026",
-    "changed_files": 21,
+    "additions": 35,
+    "author": "MaybeSam05",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Restores `is_torch_fx_available` in `transformers.utils.import_utils` as a backwards-compatibility shim so that Hub models loaded with `trust_remote_code=True` that still import this symbol no longer raise `ImportEr\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44375",
-    "created_at": "2026-03-01T17:32:17Z",
-    "deletions": 4,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44615",
+    "created_at": "2026-03-11T22:52:23Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44375/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44375",
+    "files_url": "https://github.com/huggingface/transformers/pull/44615/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44615",
     "labels": [],
     "merged": false,
-    "number": 44375,
+    "number": 44615,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Add RF-DETR",
-    "updated_at": "2026-03-05T16:00:53Z"
+    "title": "Restore is_torch_fx_available for trust_remote_code backwards compatibility",
+    "updated_at": "2026-03-12T10:33:43Z"
   },
   {
-    "additions": 4,
-    "author": "ManasVardhan",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Fixes #44373 The `position_ids` parameter docstrings in `_get_unpad_data()` (line 360) and `_upad_input()` (line 413) in `src/transformers/modeling_flash_attention_utils.py` were incorrectly describing `attention_m\u2026",
-    "changed_files": 1,
+    "additions": 19,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "Only detected in #43590, so it can only be detected there for `test_sdpa_can_compile_dynamic` (`lw_detr`). Core issue: Dynamo can cache the attribute and ignore it across frames which means that updates/reads are not working as expected. T\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44374",
-    "created_at": "2026-03-01T17:07:35Z",
-    "deletions": 2,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44614",
+    "created_at": "2026-03-11T20:49:51Z",
+    "deletions": 22,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44374/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44374",
+    "files_url": "https://github.com/huggingface/transformers/pull/44614/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44614",
     "labels": [],
     "merged": false,
-    "number": 44374,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix incorrect position_ids docstring in modeling_flash_attention_utils.py",
-    "updated_at": "2026-03-02T14:06:30Z"
-  },
-  {
-    "additions": 5,
-    "author": "N3u0ns",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #44355 `inspect.getsource()` fails with `TypeError` when called on Cython-compiled functions or built-in functions that don't have Python source code. This adds a try/except block to gracefully handle this case by returnin\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44372",
-    "created_at": "2026-03-01T13:53:58Z",
-    "deletions": 1,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44372/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44372",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44372,
-    "review_comments_count": 0,
+    "number": 44614,
+    "review_comments_count": 10,
     "state": "closed",
-    "title": "fix: handle Cython-compiled functions in get_docstring_indentation_level",
-    "updated_at": "2026-03-02T13:39:50Z"
+    "title": "[`Compile`] Fix capture outputs during compile",
+    "updated_at": "2026-03-13T02:15:21Z"
   },
   {
-    "additions": 12,
-    "author": "leaderofARS",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# Fix documentation inconsistencies in integrations folder ## Description This PR addresses documentation errors and inconsistencies across the integrations module, specifically clarifying terminology and deprecation status in two key inte\u2026",
+    "additions": 105,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "adds docs for tensor parallelism for training",
     "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44369",
-    "created_at": "2026-03-01T07:34:43Z",
-    "deletions": 9,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44613",
+    "created_at": "2026-03-11T20:43:53Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44369/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44369",
+    "files_url": "https://github.com/huggingface/transformers/pull/44613/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44613",
     "labels": [],
     "merged": false,
-    "number": 44369,
-    "review_comments_count": 7,
+    "number": 44613,
+    "review_comments_count": 0,
     "state": "open",
-    "title": "Feature/integrations docs fix",
-    "updated_at": "2026-03-06T19:47:39Z"
+    "title": "[docs] tp training",
+    "updated_at": "2026-03-11T21:08:32Z"
   },
   {
-    "additions": 171,
-    "author": "jayakumarpujar",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #43701 Models with `_checkpoint_conversion_mapping` (e.g. VLMs like Qwen2.5VL, LLaVA, ColPali, etc.) use a key renaming system: - **Loading** (`from_pretrained`): Checkpoint keys are renamed from original format \u2192 model fo\u2026",
-    "changed_files": 3,
+    "additions": 1,
+    "author": "michaelbenayoun",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? The output function hook in `EmbeddingParallel` casts the mask to fp32. It breaks things for neuron devices. Suggested fix: cast to the outputs' dtype.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44366",
-    "created_at": "2026-03-01T03:43:16Z",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44612",
+    "created_at": "2026-03-11T20:09:41Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44366/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44366",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44366,
-    "review_comments_count": 0,
+    "files_url": "https://github.com/huggingface/transformers/pull/44612/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44612",
+    "labels": [],
+    "merged": true,
+    "number": 44612,
+    "review_comments_count": 3,
     "state": "closed",
-    "title": "Fix resume_from_checkpoint key mismatch for models with _checkpoint_conversion_mapping",
-    "updated_at": "2026-03-02T14:02:22Z"
+    "title": "fix: cast to proper dtype in EmbeddingParallel",
+    "updated_at": "2026-03-12T21:08:04Z"
   },
   {
-    "additions": 8,
-    "author": "jayakumarpujar",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary - Fixes #44360 - The reference DeepSeek-V3.2 `fp8_index` kernel applies **ReLU** to per-head q\u00b7k scores before weighting and summing across heads: ``` logits[i3_n, i_h] = T.max(logits[i3_n, i_h], 0) * q_s_frag[i_h] ``` [Referenc\u2026",
+    "additions": 15,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? The following issue was identified and fixed in this PR: \u2192 [This PR (\ud83d\udea8 Delete duplicate code in backbone utils)](https://github.com/huggingface/transformers/pull/43323) structured config loading to use [BackboneMi\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44364",
-    "created_at": "2026-03-01T02:19:14Z",
-    "deletions": 4,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44611",
+    "created_at": "2026-03-11T20:02:14Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44364/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44364",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44364,
-    "review_comments_count": 0,
+    "files_url": "https://github.com/huggingface/transformers/pull/44611/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44611",
+    "labels": [],
+    "merged": true,
+    "number": 44611,
+    "review_comments_count": 12,
     "state": "closed",
-    "title": "Add missing ReLU in GlmMoeDsaIndexer scoring",
-    "updated_at": "2026-03-02T13:55:48Z"
+    "title": "fix(models): Forward timm model kwargs to timm.create_model for OmDet-Turbo",
+    "updated_at": "2026-03-13T11:57:20Z"
   },
   {
-    "additions": 57,
-    "author": "jayakumarpujar",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary - Fixes #44327 - `decode_spans()` in the QA pipeline crashes with `ValueError: kth(=N) out of bounds (N)` when `len(scores_flat) == topk` (e.g., `top_k=100` with `seq_len=10`, since `10\u00b2 = 100`) - Root cause: `np.argpartition(ar\u2026",
-    "changed_files": 2,
+    "additions": 0,
+    "author": "michaelbenayoun",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? By default, `initialize_tensor_parallelism` hides stdout and stderr for ranks > 0. While convenient, this is not perfect for dev and debugging. I suggest we simply add a flag to be able to disable this feature if wa\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44363",
-    "created_at": "2026-03-01T01:47:44Z",
-    "deletions": 1,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44608",
+    "created_at": "2026-03-11T18:57:01Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44363/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44363",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44363,
-    "review_comments_count": 0,
+    "files_url": "https://github.com/huggingface/transformers/pull/44608/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44608",
+    "labels": [],
+    "merged": true,
+    "number": 44608,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Fix off-by-one in decode_spans causing ValueError with np.argpartition",
-    "updated_at": "2026-03-02T13:03:02Z"
+    "title": "Allow to disable stdout hiding for TP",
+    "updated_at": "2026-03-12T19:36:06Z"
   },
   {
-    "additions": 4,
-    "author": "harshaljanjani",
+    "additions": 9,
+    "author": "gabe-l-hart",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? The following failing Dia use case was identified and fixed in this PR: \u2192 [MIGRATION_GUIDE_V5.md](https://github.com/harshaljanjani/transformers/blob/main/MIGRATION_GUIDE_V5.md) states that v5 renamed `additional_\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? This PR fixes generation for models using the `Idefics3ForConditionalGeneration` architecture with `use_cache=False`. ## Testing <details> <summary>docling_repro.py</summary> ```py import os import torch import time\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44362",
-    "created_at": "2026-02-28T20:04:05Z",
-    "deletions": 6,
+    "comments_count": 10,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44607",
+    "created_at": "2026-03-11T18:41:58Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44362/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44362",
+    "files_url": "https://github.com/huggingface/transformers/pull/44607/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44607",
     "labels": [],
     "merged": true,
-    "number": 44362,
+    "number": 44607,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(tokenizer): Fix MLukeTokenizer AttributeError post-v5 refactor",
-    "updated_at": "2026-03-02T14:51:18Z"
+    "title": "Idefics3 without cache fix",
+    "updated_at": "2026-03-16T15:23:34Z"
   },
   {
-    "additions": 341,
-    "author": "sxu75374",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? Adds a workaround for the PyTorch MPS `sdpa_vector_2pass_mps` correctness bug ([pytorch/pytorch#174861](https://github.com/pytorch/pytorch/issues/174861)). **The problem:** On Apple Silicon with MPS backend, `F.scal\u2026",
-    "changed_files": 2,
+    "additions": 26,
+    "author": "itazap",
+    "author_association": "MEMBER",
+    "body_excerpt": "In v5, we enforce creating a model-specific tokenizer (ex. LlamaTokenizer, Qwen2Tokenizer, et .) object when specified. 1. For instance, when `tokenizer_class` is set in `tokenization_config.json` 2. Or when using the auto_mapped `tokenize\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44359",
-    "created_at": "2026-02-28T17:47:01Z",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44606",
+    "created_at": "2026-03-11T17:29:12Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44359/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44359",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44606/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44606",
+    "labels": [],
     "merged": false,
-    "number": 44359,
+    "number": 44606,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix(sdpa): add workaround for MPS sdpa_vector_2pass_mps correctness bug",
-    "updated_at": "2026-03-02T13:54:58Z"
+    "state": "open",
+    "title": "optionally override tokenizer class with serialized tokenizer ",
+    "updated_at": "2026-03-17T16:03:19Z"
   },
   {
-    "additions": 6,
-    "author": "hardikmeisheri",
-    "author_association": "FIRST_TIMER",
-    "body_excerpt": "## Summary - `ShieldGemma2ForImageClassification` was missing `_tied_weights_keys`, so `model.lm_head.weight` was randomly re-initialized on every `from_pretrained` call instead of being tied to `embed_tokens.weight`. - This caused non-det\u2026",
-    "changed_files": 1,
+    "additions": 540,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "* refactors DeepSpeed ZeRO doc: - moves the troubleshooting section to the more general Debugging guide to keep everything in one place - moves the sequence parallelism section into a new doc to give it more visibility - update to be more\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 14,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44358",
-    "created_at": "2026-02-28T16:49:27Z",
-    "deletions": 2,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44605",
+    "created_at": "2026-03-11T17:26:12Z",
+    "deletions": 1176,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44358/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44358",
+    "files_url": "https://github.com/huggingface/transformers/pull/44605/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44605",
     "labels": [],
-    "merged": true,
-    "number": 44358,
-    "review_comments_count": 1,
-    "state": "closed",
-    "title": "Fix ShieldGemma2 non-reproducible outputs by adding _tied_weights_keys",
-    "updated_at": "2026-03-16T20:02:09Z"
+    "merged": false,
+    "number": 44605,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[docs] zero + sequence parallelism",
+    "updated_at": "2026-03-16T20:31:23Z"
   },
   {
-    "additions": 482,
-    "author": "NabilMch",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 127,
+    "additions": 415,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR fix the quantization CI : - [x] autoround - [x] bnb - [x] fp_quant_integration - [x] metal - [x] mxfp4 - [x] quark_integration - [x] torchao_integration One major point in this PR is that I bump the min vers\u2026",
+    "changed_files": 19,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44357",
-    "created_at": "2026-02-28T15:11:37Z",
-    "deletions": 489,
+    "comments_count": 33,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44604",
+    "created_at": "2026-03-11T16:44:38Z",
+    "deletions": 912,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44357/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44357",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44357,
+    "files_url": "https://github.com/huggingface/transformers/pull/44604/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44604",
+    "labels": [],
+    "merged": true,
+    "number": 44604,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix RoPE inv_freq default initialization (Issue #39753)",
-    "updated_at": "2026-03-02T13:50:00Z"
+    "title": "Bump torchao >=0.15 and fix quantization CI",
+    "updated_at": "2026-03-16T16:07:12Z"
   },
   {
-    "additions": 6,
-    "author": "iamaber",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Checks if model is already in target dtype before casting to avoid redundant copies that cause 25% performance degradation with `--fp16_full_eval`. ## Changes - Added dtype check before casting model to fp16/bf16 in `evaluation_\u2026",
+    "additions": 17,
+    "author": "michalrzak",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? A quick fix that updates the Dockerfile to run on `arm64` systems (such as the NVIDIA Spark). The previous version of the Dockerfile fails on `arm64` systems due to `SudachiPy`, which only provides wheels for `x86_6\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44356",
-    "created_at": "2026-02-28T14:24:32Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44603",
+    "created_at": "2026-03-11T16:42:30Z",
     "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44356/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44356",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44603/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44603",
+    "labels": [],
     "merged": false,
-    "number": 44356,
+    "number": 44603,
     "review_comments_count": 0,
+    "state": "open",
+    "title": "fixed dockerfile for arm64 systems",
+    "updated_at": "2026-03-11T16:42:30Z"
+  },
+  {
+    "additions": 218,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title. Follow up of https://github.com/huggingface/transformers/pull/44330 Also take the opportunity to simplify t5 and its children, because the way they compute`position_bias` was super convoluted/overc\u2026",
+    "changed_files": 61,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44602",
+    "created_at": "2026-03-11T16:19:43Z",
+    "deletions": 1083,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44602/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44602",
+    "labels": [],
+    "merged": true,
+    "number": 44602,
+    "review_comments_count": 14,
     "state": "closed",
-    "title": "fix: avoid redundant fp16/bf16 model casts in evaluation_loop",
-    "updated_at": "2026-03-02T13:59:38Z"
+    "title": "Remove `cache_position` in more models (2)",
+    "updated_at": "2026-03-12T22:38:15Z"
   },
   {
-    "additions": 73,
-    "author": "sxu75374",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Fixes an off-by-one error in `decode_spans()` where `np.argpartition` is called with `kth == len(arr)` when `topk` equals the number of candidate scores. This raises `ValueError: kth(=N) out of bounds (N)`. **Root\u2026",
-    "changed_files": 2,
+    "additions": 510,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Goal is to be able to run a model with both PP and TP.",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44354",
-    "created_at": "2026-02-28T08:46:39Z",
-    "deletions": 1,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44354/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44354",
-    "labels": [
-      "Code agent slop"
-    ],
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44601",
+    "created_at": "2026-03-11T15:56:51Z",
+    "deletions": 2,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44601/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44601",
+    "labels": [],
     "merged": false,
-    "number": 44354,
+    "number": 44601,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: off-by-one in decode_spans causes ValueError when topk == len(scores)",
-    "updated_at": "2026-03-02T13:02:38Z"
+    "state": "open",
+    "title": "[Distributed] Add PP support natively",
+    "updated_at": "2026-03-12T11:53:24Z"
   },
   {
-    "additions": 50,
-    "author": "kaixuanliu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "@IlyasMoutawwakil , pls help review, thx!",
+    "additions": 0,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Small mistake in https://github.com/huggingface/transformers/pull/44432. cc @zucchini-nlp, was it intended to remove the scaling? (I assume so since the embedding now has the saling baked in, and I guess paligemma a\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44353",
-    "created_at": "2026-02-28T07:50:37Z",
-    "deletions": 2,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44600",
+    "created_at": "2026-03-11T15:40:16Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44353/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44353",
+    "files_url": "https://github.com/huggingface/transformers/pull/44600/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44600",
     "labels": [],
     "merged": true,
-    "number": 44353,
-    "review_comments_count": 1,
+    "number": 44600,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "add expectations for xpu for olmo_hybrid model",
-    "updated_at": "2026-03-02T10:15:09Z"
+    "title": "Remove useless identity assignment",
+    "updated_at": "2026-03-12T10:21:23Z"
   },
   {
-    "additions": 13,
-    "author": "giulio-leone",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #44336 The `loading_report` module emitted **bold/italic ANSI escape codes** even when `stdout` was not connected to a terminal (e.g. piped or redirected output). While `_color()` already gated color codes behind `sys.stdo\u2026",
+    "additions": 3,
+    "author": "hf-security-analysis[bot]",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Update `.github/workflows/self-comment-ci.yml` workflow configuration. cc @vasqu Closes huggingface/tracking-issues#17",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44352",
-    "created_at": "2026-02-28T06:22:19Z",
-    "deletions": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44599",
+    "created_at": "2026-03-11T15:39:30Z",
+    "deletions": 161,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44352/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44352",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44599/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44599",
+    "labels": [],
     "merged": false,
-    "number": 44352,
+    "number": 44599,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: suppress ANSI escape codes when stdout is not a terminal",
-    "updated_at": "2026-03-02T13:59:15Z"
+    "title": "chore: update self-comment-ci.yml",
+    "updated_at": "2026-04-02T09:05:25Z"
   },
   {
-    "additions": 7,
-    "author": "kaixuanliu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": null,
-    "changed_files": 2,
+    "additions": 1,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "As per title, updated to remove former members as well cc @yonigozlan @ydshieh",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44350",
-    "created_at": "2026-02-28T03:20:47Z",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44598",
+    "created_at": "2026-03-11T15:13:25Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44350/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44350",
+    "files_url": "https://github.com/huggingface/transformers/pull/44598/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44598",
     "labels": [],
     "merged": true,
-    "number": 44350,
-    "review_comments_count": 6,
+    "number": 44598,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "skip 1 invalid test case for higgs_audio_v2",
-    "updated_at": "2026-03-05T11:08:09Z"
+    "title": "Add Yoni to run-slow workflow",
+    "updated_at": "2026-03-11T15:38:10Z"
   },
   {
-    "additions": 49,
-    "author": "zzc0430",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? > Inspired by https://github.com/huggingface/transformers/pull/44347#issuecomment-3976028358 Fixes `transformers serve` failing with hybrid models like Qwen3.5 that use `linear_attention` layers. Two issues are addr\u2026",
-    "changed_files": 3,
+    "additions": 1,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Our beautiful Dashboard is missing ..... damm",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44349",
-    "created_at": "2026-02-28T03:09:30Z",
-    "deletions": 3,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44349/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44349",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44597",
+    "created_at": "2026-03-11T13:53:02Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44597/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44597",
     "labels": [],
-    "merged": false,
-    "number": 44349,
+    "merged": true,
+    "number": 44597,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: support linear_attention in continuous batching and fix serve ch\u2026",
-    "updated_at": "2026-03-02T13:48:04Z"
+    "title": "Fix CircleCI summary report not showing due to missing dependency",
+    "updated_at": "2026-03-20T07:33:38Z"
   },
   {
-    "additions": 341,
-    "author": "n0kovo",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Summary Most quantized models for Apple Silicon on the Hub are in MLX format. The `MetalConfig` quantization backend supports on-the-fly quantization of standard checkpoints but cannot load pre-quantized MLX models. This PR fixes the fi\u2026",
-    "changed_files": 4,
+    "additions": 26,
+    "author": "Desel72",
+    "author_association": "NONE",
+    "body_excerpt": "Fix TypeError when loading float8 models by falling back to bfloat16 in local_torch_dtype # What does this PR do? When loading FP8 models (e.g. `Qwen/Qwen3.5-35B-A3B-FP8`) with `dtype=\"auto\"`, the auto-detected dtype from checkpoint weight\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44348",
-    "created_at": "2026-02-28T00:24:32Z",
-    "deletions": 32,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44596",
+    "created_at": "2026-03-11T13:03:19Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44348/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44348",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44596/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44596",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44348,
+    "number": 44596,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Enable MetalConfig to load pre-quantized MLX models from HuggingFace Hub",
-    "updated_at": "2026-03-02T17:18:46Z"
+    "state": "closed",
+    "title": "Fix TypeError when loading float8 models by falling back to bfloat16 in local_torch_dtype",
+    "updated_at": "2026-03-11T14:00:39Z"
   },
   {
-    "additions": 49,
-    "author": "sxu75374",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Fixes `AttributeError` when using continuous batching with composite model configs (e.g. `Qwen3_5Config` for vision-language models). Composite configs store attributes like `num_attention_heads` and `num_key_value\u2026",
-    "changed_files": 2,
+    "additions": 2324,
+    "author": "yonigozlan",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Add [CHMv2 ](https://arxiv.org/abs/2603.06382) to Transformers",
+    "changed_files": 23,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44347",
-    "created_at": "2026-02-27T22:48:49Z",
-    "deletions": 8,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44595",
+    "created_at": "2026-03-11T12:38:44Z",
+    "deletions": 23,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44347/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44347",
+    "files_url": "https://github.com/huggingface/transformers/pull/44595/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44595",
     "labels": [
-      "Code agent slop"
+      "New model",
+      "run-slow"
     ],
-    "merged": false,
-    "number": 44347,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44595,
+    "review_comments_count": 30,
     "state": "closed",
-    "title": "fix: resolve composite config in PagedAttentionCache and group_layers_by_attn_type",
-    "updated_at": "2026-03-02T13:41:23Z"
+    "title": "Add CHMv2",
+    "updated_at": "2026-03-11T16:00:03Z"
   },
   {
-    "additions": 4,
-    "author": "sxu75374",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? Fixes the `q_a_layernorm` and `kv_a_layernorm` in DeepSeek V2/V3 MLA attention to explicitly receive `config.rms_norm_eps` instead of falling back to the RMSNorm class default (`1e-6`). **The problem:** All other RM\u2026",
+    "additions": 271,
+    "author": "vimal-crypto",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What this PR does This PR brings `ObjectDetectionPipeline` in line with its sister pipelines (`ZeroShotObjectDetectionPipeline`, `ImageClassificationPipeline`) by adding four enhancements to the postprocessing stage. ### Changes **1. Sc\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44346",
-    "created_at": "2026-02-27T21:47:45Z",
-    "deletions": 4,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44594",
+    "created_at": "2026-03-11T12:37:46Z",
+    "deletions": 40,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44346/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44346",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44594/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44594",
+    "labels": [],
     "merged": false,
-    "number": 44346,
+    "number": 44594,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix(deepseek): pass config.rms_norm_eps to MLA q/kv layernorms",
-    "updated_at": "2026-03-02T13:26:21Z"
+    "state": "open",
+    "title": "[Pipeline] Add top_k, label filtering, box_format and score sorting to ObjectDetectionPipeline",
+    "updated_at": "2026-03-11T12:37:46Z"
   },
   {
-    "additions": 13,
-    "author": "manavshrivastavagit",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #44303 When redirecting `from_pretrained` output to a log file (e.g. in CI), the \"Loading weights\" tqdm bar was updating its postfix with `Materializing param=...` on every parameter, producing huge log files. ## Change -\u2026",
-    "changed_files": 2,
+    "additions": 15,
+    "author": "BenjaminBossan",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Multiple PEFT tests are failing due to recent changes in transformers. - hf_device_map attribute may not exist in some cases - respect inference_mode in load_adapter - new model loading requires changes for bnb (SCB\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44345",
-    "created_at": "2026-02-27T21:05:22Z",
-    "deletions": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44592",
+    "created_at": "2026-03-11T10:41:51Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44345/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44345",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44345,
+    "files_url": "https://github.com/huggingface/transformers/pull/44592/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44592",
+    "labels": [],
+    "merged": true,
+    "number": 44592,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Less verbose weight-loading tqdm when stdout is not a TTY (fixes #44303)",
-    "updated_at": "2026-03-02T13:49:11Z"
+    "title": "FIX Multiple PEFT errors after v5 transition",
+    "updated_at": "2026-03-11T12:24:05Z"
   },
   {
-    "additions": 6,
-    "author": "manavshrivastavagit",
+    "additions": 60,
+    "author": "moktamd",
     "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #44297 Qwen3.5 models on the Hub (e.g. [Qwen/Qwen3.5-27B](https://huggingface.co/Qwen/Qwen3.5-27B)) use `\"tokenizer_class\": \"Qwen2Tokenizer\"` in `tokenizer_config.json`, but `TOKENIZER_MAPPING_NAMES` had `qwen3_5` \u2192 `\"Qwen\u2026",
+    "body_excerpt": "Adds `_apply_mps_fixes` in `sdpa_attention.py` to handle two upstream PyTorch MPS bugs: 1. **pytorch/pytorch#176767** (fixed in PyTorch 2.12): pads value tensor when `v_head_dim != q_head_dim` to avoid corrupted output. Affects DeepSeek mo\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44344",
-    "created_at": "2026-02-27T21:04:27Z",
-    "deletions": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44591",
+    "created_at": "2026-03-11T10:32:26Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44344/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44344",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44591/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44591",
+    "labels": [],
     "merged": false,
-    "number": 44344,
+    "number": 44591,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix tokenizer_class in tokenizer_config.json for Qwen3.5 save_pretrained (fixes #44297)",
-    "updated_at": "2026-03-02T13:17:41Z"
+    "title": "Add MPS SDPA workarounds for value head dim and bidirectional attention",
+    "updated_at": "2026-03-11T13:37:15Z"
   },
   {
-    "additions": 16,
-    "author": "manavshrivastavagit",
+    "additions": 2,
+    "author": "pranay-3108",
     "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #44336 `utils/loading_report.py` was emitting ANSI codes for **bold** and *italic* via `PALETTE['bold']` and `PALETTE['italic']` without checking if stdout is connected to a terminal. `_color()` already respects `sys.stdou\u2026",
-    "changed_files": 2,
+    "body_excerpt": "Fixes incorrect documentation for `position_ids` in `masking_utils.py`. The docstring previously described `position_ids` as `torch.Tensor`. This PR updates it to `torch.LongTensor` and aligns the description with the standard wording used\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44343",
-    "created_at": "2026-02-27T20:58:33Z",
-    "deletions": 9,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44590",
+    "created_at": "2026-03-11T05:13:57Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44343/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44343",
+    "files_url": "https://github.com/huggingface/transformers/pull/44590/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44590",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44343,
+    "number": 44590,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix ANSI codes in loading_report when stdout is not a TTY (fixes #44336)",
-    "updated_at": "2026-03-02T13:44:43Z"
+    "title": "Fix incorrect docstring for position_ids",
+    "updated_at": "2026-03-11T21:08:42Z"
   },
   {
-    "additions": 383,
-    "author": "stevhliu",
-    "author_association": "MEMBER",
-    "body_excerpt": "- created a new performance section divided into memory and speed optimizations - model memory training anatomy [guide](https://huggingface.co/docs/transformers/main/en/model_memory_anatomy) is now the more descriptive and simplified GPU m\u2026",
-    "changed_files": 9,
+    "additions": 1,
+    "author": "jiqing-feng",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Fixes Llama4 model loading under BitsAndBytes (BNB) quantization mode. Router quantized incorrectly causes shape mismatch: Llama4Router inherits from nn.Linear, so BNB quantizes its weight into a packed format. However, super().forward() c\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44342",
-    "created_at": "2026-02-27T20:10:49Z",
-    "deletions": 273,
+    "comments_count": 15,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44588",
+    "created_at": "2026-03-11T01:42:33Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44342/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44342",
+    "files_url": "https://github.com/huggingface/transformers/pull/44588/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44588",
     "labels": [],
-    "merged": false,
-    "number": 44342,
-    "review_comments_count": 12,
-    "state": "open",
-    "title": "[docs] training performance",
-    "updated_at": "2026-03-16T20:24:33Z"
+    "merged": true,
+    "number": 44588,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": "Fix llama4 bnb mode",
+    "updated_at": "2026-03-27T14:19:14Z"
   },
   {
-    "additions": 12,
-    "author": "Kokonico",
-    "author_association": "NONE",
-    "body_excerpt": "Fixes #44336 ## Changes * Added a new `_palette` function to return the ANSI code for a given color or format only if `sys.stdout` is interactive. (`src/transformers/utils/loading_report.py`) * Updated all usages of `PALETTE[<format>]` in\u2026",
-    "changed_files": 1,
+    "additions": 32,
+    "author": "kmbhattt-aws",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44341",
-    "created_at": "2026-02-27T19:30:30Z",
-    "deletions": 10,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44341/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44341",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44341,
-    "review_comments_count": 2,
-    "state": "closed",
-    "title": "Fix and optimize ANSI color handling in loading report for interactive terminals",
-    "updated_at": "2026-03-02T18:16:00Z"
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44587",
+    "created_at": "2026-03-11T01:01:18Z",
+    "deletions": 20,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44587/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44587",
+    "labels": [],
+    "merged": false,
+    "number": 44587,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Fix: Handling fused qkv result tensor slicing for tp sharded qkv weights",
+    "updated_at": "2026-03-12T21:31:29Z"
   },
   {
-    "additions": 33,
-    "author": "Rocketknight1",
-    "author_association": "MEMBER",
-    "body_excerpt": "Some speculative tests seem flaky with SDPA but reliable with `eager` attention. In local testing, `test_speculative_decoding_equals_regular_decoding` fails 5-10% of the time without this change. and I also saw CI failures. Failures are re\u2026",
-    "changed_files": 1,
+    "additions": 91,
+    "author": "mvanhorn",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Decouples router logits collection from output visibility in Mixtral's `ForCausalLM`. Previously, `output_router_logits=False` (the default) prevented `aux_loss` from being computed, meaning load balancing was silen\u2026",
+    "changed_files": 13,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44340",
-    "created_at": "2026-02-27T18:09:09Z",
-    "deletions": 27,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44586",
+    "created_at": "2026-03-11T00:24:07Z",
+    "deletions": 39,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44340/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44340",
+    "files_url": "https://github.com/huggingface/transformers/pull/44586/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44586",
     "labels": [],
-    "merged": true,
-    "number": 44340,
-    "review_comments_count": 6,
+    "merged": false,
+    "number": 44586,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix speculative tests that are flaky with SDPA",
-    "updated_at": "2026-03-02T17:18:27Z"
+    "title": "Fix Mixtral aux_loss not computed when output_router_logits=False",
+    "updated_at": "2026-03-11T14:31:21Z"
   },
   {
-    "additions": 6109,
-    "author": "harshaljanjani",
+    "additions": 10,
+    "author": "mvanhorn",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? \u2192 This PR adds **DEIMv2** to Transformers! \u2192 **IMP:** I've linked two notebooks: a [Colab notebook here](https://colab.research.google.com/drive/1jCNefxrKiHWdBEIYTcU3jsd9xyWDwIxC?usp=sharing) demonstrating the fun\u2026",
-    "changed_files": 16,
+    "body_excerpt": "# What does this PR do? Passes `eps=config.rms_norm_eps` to both `q_a_layernorm` and `kv_a_layernorm` in the DeepseekV3 MLA attention module. Without this, these layernorms default to `eps=1e-5` instead of the config value (`1e-6`), causin\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 14,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44339",
-    "created_at": "2026-02-27T18:08:53Z",
-    "deletions": 0,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44585",
+    "created_at": "2026-03-11T00:20:54Z",
+    "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44339/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44339",
+    "files_url": "https://github.com/huggingface/transformers/pull/44585/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44585",
     "labels": [],
     "merged": false,
-    "number": 44339,
-    "review_comments_count": 69,
+    "number": 44585,
+    "review_comments_count": 0,
     "state": "open",
-    "title": "model: Add DEIMv2 to Transformers",
-    "updated_at": "2026-03-24T13:42:36Z"
+    "title": "Fix missing rms_norm_eps in DeepseekV3 MLA layernorms",
+    "updated_at": "2026-03-12T14:39:12Z"
   },
   {
-    "additions": 3641,
-    "author": "SunMarc",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR rework a bit how distributed tests are tested. I tried to keep some of the existing tests and added new tests also. For each of these distributed methods (ddp, fsdp, deepspeeed), we have some common tests li\u2026",
-    "changed_files": 38,
+    "additions": 1,
+    "author": "mvanhorn",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes an off-by-one error in `decode_spans()` in the document question answering pipeline that causes a `ValueError: kth(=N) out of bounds` crash when `len(scores_flat) == topk`. The boundary check on line 97 uses `\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44338",
-    "created_at": "2026-02-27T17:50:16Z",
-    "deletions": 3762,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44584",
+    "created_at": "2026-03-10T23:52:51Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44338/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44338",
+    "files_url": "https://github.com/huggingface/transformers/pull/44584/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44584",
     "labels": [],
     "merged": true,
-    "number": 44338,
+    "number": 44584,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Update distributed tests",
-    "updated_at": "2026-03-05T23:35:36Z"
+    "title": "Fix off-by-one in decode_spans boundary check",
+    "updated_at": "2026-03-12T13:22:10Z"
   },
   {
-    "additions": 2,
-    "author": "stevhliu",
-    "author_association": "MEMBER",
-    "body_excerpt": "- moves `kernels-community/flash-attn2:FlashAttention2` to `from_pretrained(attn_implementation...)` - fix error message for registering a kernel",
-    "changed_files": 2,
+    "additions": 45,
+    "author": "wilnn",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "\u2026kpoint when `save_strategy` is `best` # What does this PR do? fix load_best_model_checkpoint_at_end do not load the best model checkpoint at the end when `save_strategy` is `\"best\"` Fixes # (issue) fix load_best_model_checkpoint_at_end do\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44337",
-    "created_at": "2026-02-27T17:36:54Z",
-    "deletions": 2,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44583",
+    "created_at": "2026-03-10T22:37:36Z",
+    "deletions": 16,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44337/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44337",
+    "files_url": "https://github.com/huggingface/transformers/pull/44583/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44583",
     "labels": [],
     "merged": true,
-    "number": 44337,
-    "review_comments_count": 0,
+    "number": 44583,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "[docs] kernelconfig fix",
-    "updated_at": "2026-02-27T22:46:30Z"
+    "title": "fix load_best_model_checkpoint_at_end do not load the best model chec\u2026",
+    "updated_at": "2026-03-24T15:42:26Z"
   },
   {
-    "additions": 57,
-    "author": "ManasVardhan",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Refactors the RoFormer model to use the `@capture_outputs` and `@can_return_tuple` decorators, following the established pattern (similar to #44047 for Bloom, #44151 for BioGPT, etc.). ### Changes: - **`RoFormerMod\u2026",
+    "additions": 3,
+    "author": "yonigozlan",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fix an issue introduced in #42564 . The refactor embedded raw image tokens instead of BPE tokens, causing the model to output gibberish. This fix adds back the image tokens to BPE tokens conversion before embedding.\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44335",
-    "created_at": "2026-02-27T17:23:01Z",
-    "deletions": 172,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44582",
+    "created_at": "2026-03-10T21:00:30Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44335/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44335",
+    "files_url": "https://github.com/huggingface/transformers/pull/44582/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44582",
     "labels": [],
-    "merged": false,
-    "number": 44335,
+    "merged": true,
+    "number": 44582,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Refactor RoFormer output tracing with @capture_outputs and @can_return_tuple",
-    "updated_at": "2026-03-03T00:29:59Z"
+    "title": "Fix missing BPE token conversion step in Chameleon",
+    "updated_at": "2026-03-11T11:26:49Z"
   },
   {
-    "additions": 1,
-    "author": "NielsRogge",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? I had some issues with running `transformers-cli add-new-model-like`. This PR fixes it. Fixes #44661.",
-    "changed_files": 1,
+    "additions": 9,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? The following issues were identified and fixed in this PR: \u2192 **Llama-4 Vision:** [freqs_ci is stored as a plain attr](https://github.com/huggingface/transformers/blob/153894c013/src/transformers/models/llama4/mode\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44334",
-    "created_at": "2026-02-27T17:13:44Z",
-    "deletions": 0,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44581",
+    "created_at": "2026-03-10T19:33:51Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44334/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44334",
+    "files_url": "https://github.com/huggingface/transformers/pull/44581/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44581",
     "labels": [],
     "merged": true,
-    "number": 44334,
-    "review_comments_count": 0,
+    "number": 44581,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Fix CookieCutter",
-    "updated_at": "2026-03-13T17:13:28Z"
+    "title": "fix(models, testing): Fix Llama4 vision rotary meta tensor initialization and MyT5 get_tokenizer signature",
+    "updated_at": "2026-03-13T16:13:09Z"
   },
   {
-    "additions": 13,
-    "author": "ManasVardhan",
+    "additions": 16,
+    "author": "rabbierabbie",
     "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Refactors the ALBERT model to use named attribute access instead of index-based access on model outputs, and removes redundant `return_dict=True` arguments from inner model calls (already handled by `@capture_outpu\u2026",
+    "body_excerpt": "This PR clarifies references to the **Transformers library** in the README. While reading the documentation as a new user, I initially found the term **\"Transformers\"** ambiguous because it could refer either to the **Transformer architect\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44333",
-    "created_at": "2026-02-27T17:12:25Z",
-    "deletions": 18,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44580",
+    "created_at": "2026-03-10T18:21:55Z",
+    "deletions": 16,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44333/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44333",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44580/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44580",
+    "labels": [],
     "merged": false,
-    "number": 44333,
+    "number": 44580,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Refactor ALBERT to use named attributes and remove redundant return_dict=True",
-    "updated_at": "2026-03-02T13:05:54Z"
+    "title": "Clarify references to the Transformers library in README",
+    "updated_at": "2026-03-11T13:24:58Z"
   },
   {
-    "additions": 3,
-    "author": "tysoncung",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Fix minor typos found in comments and docstrings: - `orignal` \u2192 `original` in `src/transformers/integrations/peft.py` (lines 245, 284) - Duplicate word `is is` \u2192 `is` in `src/transformers/models/dia/processing_dia.py` (line 89) Small clean\u2026",
-    "changed_files": 2,
+    "additions": 1,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? We need to fetch the specific commit (the so called merge commit created by Github itself)",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44332",
-    "created_at": "2026-02-27T16:11:46Z",
-    "deletions": 3,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44579",
+    "created_at": "2026-03-10T16:58:09Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44332/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44332",
+    "files_url": "https://github.com/huggingface/transformers/pull/44579/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44579",
     "labels": [],
     "merged": true,
-    "number": 44332,
+    "number": 44579,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix typos in comments and docstrings",
-    "updated_at": "2026-02-27T18:02:59Z"
+    "title": "Fix PR comment CI for quantization job",
+    "updated_at": "2026-03-10T17:07:11Z"
   },
   {
-    "additions": 33,
-    "author": "kashif",
+    "additions": 449,
+    "author": "remi-or",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? fixed the bfloat16 dtype mismatch and Loss computation shape mismatch. Also added tests for these. <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to ap\u2026",
-    "changed_files": 3,
+    "body_excerpt": "# Summary This PR adds three attributes to the compile config, to have granularity over how varlen (handles mixed prefil and decode batches) and decode (only decode batches) are compiled. We want to have this kind of granularity because va\u2026",
+    "changed_files": 11,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44331",
-    "created_at": "2026-02-27T15:46:08Z",
-    "deletions": 10,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44578",
+    "created_at": "2026-03-10T16:31:20Z",
+    "deletions": 121,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44331/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44331",
+    "files_url": "https://github.com/huggingface/transformers/pull/44578/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44578",
     "labels": [],
     "merged": true,
-    "number": 44331,
-    "review_comments_count": 9,
+    "number": 44578,
+    "review_comments_count": 4,
     "state": "closed",
-    "title": "[timesfm2_5] fix timesfm2.5 loss",
-    "updated_at": "2026-03-03T17:22:56Z"
+    "title": "[CB] Better parametrization for compile",
+    "updated_at": "2026-03-19T11:50:08Z"
   },
   {
-    "additions": 289,
-    "author": "Cyrilvallez",
+    "additions": 5,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title! Follow-up of https://github.com/huggingface/transformers/pull/44181 with more models!",
-    "changed_files": 136,
+    "body_excerpt": "- Fix quantizer_aqlm.py to use renamed modules_to_not_convert parameter instead of removed linear_weights_not_to_quantize - Update test to match new function signature: no tuple return, module names instead of weight names",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 12,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44330",
-    "created_at": "2026-02-27T15:33:02Z",
-    "deletions": 1682,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44577",
+    "created_at": "2026-03-10T15:57:36Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44330/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44330",
+    "files_url": "https://github.com/huggingface/transformers/pull/44577/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44577",
     "labels": [],
     "merged": true,
-    "number": 44330,
-    "review_comments_count": 13,
+    "number": 44577,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Remove `cache_position` in more models",
-    "updated_at": "2026-03-11T14:47:50Z"
+    "title": "Fix: AQLM quantizer to match updated replace_with_aqlm_linear signature",
+    "updated_at": "2026-03-10T17:48:00Z"
   },
   {
-    "additions": 3,
-    "author": "linfeng-du",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "additions": 16,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/44387. This PR disable async loading when we want to quantize the model. it is actually faster than doing a semaphore. If a quantizer happens to quantize fast\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44329",
-    "created_at": "2026-02-27T15:27:39Z",
-    "deletions": 3,
+    "comments_count": 10,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44576",
+    "created_at": "2026-03-10T15:07:01Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44329/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44329",
+    "files_url": "https://github.com/huggingface/transformers/pull/44576/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44576",
     "labels": [],
     "merged": true,
-    "number": 44329,
-    "review_comments_count": 4,
+    "number": 44576,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Enable Liger Kernel when doing hyperparameter search.",
-    "updated_at": "2026-03-03T13:44:56Z"
+    "title": "Disable async loading when quantizing on the fly",
+    "updated_at": "2026-03-16T16:36:42Z"
   },
   {
-    "additions": 92,
-    "author": "SunMarc",
+    "additions": 13,
+    "author": "Rocketknight1",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR adds guidelines for agents when it comes to add/run trainer tests. This needs to be updated as we modify, refactor the code !",
+    "body_excerpt": "Some parameters in Tapas are initialized in `__init__()` and not reinitialized in `_init_weights()`, which means that if the model is created on the `meta` device, those parameters do not get a weight initialization. This causes a crash la\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44328",
-    "created_at": "2026-02-27T15:17:24Z",
-    "deletions": 0,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44575",
+    "created_at": "2026-03-10T14:42:40Z",
+    "deletions": 16,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44328/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44328",
+    "files_url": "https://github.com/huggingface/transformers/pull/44575/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44575",
     "labels": [],
     "merged": true,
-    "number": 44328,
-    "review_comments_count": 2,
-    "state": "closed",
-    "title": "Add testing guide for agents for trainer tests",
-    "updated_at": "2026-02-27T17:32:11Z"
-  },
-  {
-    "additions": 38,
-    "author": "overcastbulb",
-    "author_association": "NONE",
-    "body_excerpt": "Adds missing pipeline tutorial example for zero-shot-classification following the existing format of other task examples in pipeline_tutorial.md. Related: #18926",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44326",
-    "created_at": "2026-02-27T14:37:19Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44326/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44326",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44326,
+    "number": 44575,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "docs: Add zero-shot-classification example to pipeline tutorial",
-    "updated_at": "2026-02-27T14:46:24Z"
+    "title": "Correct Tapas initialization",
+    "updated_at": "2026-03-10T15:14:37Z"
   },
   {
-    "additions": 4,
-    "author": "kashif",
+    "additions": 33,
+    "author": "ArthurZucker",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? The `CLIPMLP` has the bias set to True but timesFM 2.5 uses `bias=False` in the pretrained model <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appe\u2026",
-    "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44325",
-    "created_at": "2026-02-27T13:18:40Z",
-    "deletions": 2,
+    "body_excerpt": "Fixes #44448",
+    "changed_files": 4,
+    "cluster_id": "cluster-44053-8",
+    "cluster_ids": [
+      "cluster-44053-8"
+    ],
+    "cluster_role": "canonical",
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44571",
+    "created_at": "2026-03-10T14:28:22Z",
+    "deletions": 12,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44325/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44325",
+    "files_url": "https://github.com/huggingface/transformers/pull/44571/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44571",
     "labels": [],
     "merged": true,
-    "number": 44325,
+    "number": 44571,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[timesfm2_5] fix timesfm mlp bias",
-    "updated_at": "2026-02-27T13:36:13Z"
+    "title": "Fix pegasus conversion",
+    "updated_at": "2026-03-18T09:55:00Z"
   },
   {
-    "additions": 16,
-    "author": "tonglei19961121",
-    "author_association": "NONE",
-    "body_excerpt": "Add tutorial example for DocumentQuestionAnswering pipeline following the existing format of other task examples. This PR addresses the first TODO item in #18926. Changes: - Added document-question-answering task example to pipeline_tutori\u2026",
+    "additions": 30,
+    "author": "umbilnm",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? In transformers v5, `DebertaV2Tokenizer` was rewritten to use `TokenizersBackend`, but the `post_processor` responsible for adding `[CLS]`/`[SEP]` tokens was never set. This causes `add_special_tokens=True` to silen\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44324",
-    "created_at": "2026-02-27T12:29:06Z",
+    "comments_count": 11,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44570",
+    "created_at": "2026-03-10T13:37:06Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44324/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44324",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44324,
-    "review_comments_count": 0,
+    "files_url": "https://github.com/huggingface/transformers/pull/44570/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44570",
+    "labels": [],
+    "merged": true,
+    "number": 44570,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "docs: Add document-question-answering example to pipeline tutorial",
-    "updated_at": "2026-02-27T14:35:23Z"
+    "title": "Fix missing post_processor in DebertaV2Tokenizer causing no special t\u2026",
+    "updated_at": "2026-03-24T09:40:44Z"
   },
   {
-    "additions": 16,
-    "author": "tonglei19961121",
-    "author_association": "NONE",
-    "body_excerpt": "Add tutorial example for DocumentQuestionAnswering pipeline following the existing format of other task examples. Fixes #18926",
-    "changed_files": 2,
+    "additions": 267,
+    "author": "aashay-sarvam",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Adds native support for the `sarvam_mla` model type (`sarvamai/sarvam-105b`) to HuggingFace Transformers using the **modular pattern**, inheriting from DeepSeek V3. ### Model Architecture SarvamMLA is a **105B para\u2026",
+    "changed_files": 10,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44323",
-    "created_at": "2026-02-27T12:26:00Z",
-    "deletions": 1,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44323/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44323",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44323,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "docs: Add document-question-answering example to pipeline tutorial",
-    "updated_at": "2026-02-27T14:38:43Z"
-  },
-  {
-    "additions": 12,
-    "author": "kaixuanliu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "@vasqu This PR skipped 2 invalid test cases: ``` tests/models/voxtral_realtime/test_modeling_voxtral_realtime.py::VoxtralRealtimeForConditionalGenerationModelTest::test_generate_with_quant_cache tests/models/voxtral_realtime/test_modeling_\u2026",
-    "changed_files": 1,
-    "cluster_id": "cluster-43324-12",
-    "cluster_ids": [
-      "cluster-43324-12"
-    ],
-    "cluster_role": "member",
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44321",
-    "created_at": "2026-02-27T09:54:14Z",
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44569",
+    "created_at": "2026-03-10T11:55:01Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44321/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44321",
+    "files_url": "https://github.com/huggingface/transformers/pull/44569/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44569",
     "labels": [],
-    "merged": true,
-    "number": 44321,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "skip 2 invalid test cases for voxtral_realtime model",
-    "updated_at": "2026-03-02T12:09:10Z"
+    "merged": false,
+    "number": 44569,
+    "review_comments_count": 20,
+    "state": "open",
+    "title": "Add SarvamMLA model (sarvamai/sarvam-105b)",
+    "updated_at": "2026-03-18T13:58:18Z"
   },
   {
-    "additions": 6038,
-    "author": "NielsRogge",
+    "additions": 2,
+    "author": "tomaarsen",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR adds [SAM3-LiteText: An Anatomical Study of the SAM3 Text Encoder for Efficient Vision-Language Segmentation](https://huggingface.co/papers/2602.12173). Fixes #44205",
-    "changed_files": 13,
+    "body_excerpt": "# What does this PR do? Currently, when using Apertus (or rather, it's `XIELUActivation`), and you don't have `xielu` installed, then you'll fall to this `except:` https://github.com/huggingface/transformers/blob/5a098a1e01034095f037c8a37f\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44320",
-    "created_at": "2026-02-27T08:29:00Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44567",
+    "created_at": "2026-03-10T11:39:20Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44320/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44320",
-    "labels": [
-      "New model"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44567/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44567",
+    "labels": [],
+    "merged": true,
+    "number": 44567,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[`fix`] Prevent crash with Apertus without xielu installed",
+    "updated_at": "2026-03-10T13:24:11Z"
+  },
+  {
+    "additions": 181,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "This patch extends `ty` check to `src/transformers/cli` Based on https://github.com/huggingface/transformers/pull/44412",
+    "changed_files": 8,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44566",
+    "created_at": "2026-03-10T10:40:13Z",
+    "deletions": 86,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44566/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44566",
+    "labels": [],
     "merged": false,
-    "number": 44320,
-    "review_comments_count": 1,
-    "state": "open",
-    "title": "Add SAM3-LiteText",
-    "updated_at": "2026-03-24T19:07:25Z"
+    "number": 44566,
+    "review_comments_count": 21,
+    "state": "closed",
+    "title": "chore(typing): extend typing to `src/transformers/cli` ",
+    "updated_at": "2026-04-01T16:05:57Z"
   },
   {
-    "additions": 74,
-    "author": "IlyasMoutawwakil",
+    "additions": 36,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 3,
+    "body_excerpt": "# What does this PR do? As per the title. The unused memory is taken into account too late, which leads to different device_map for the same hardware and models, and even random cuda OOM!! Basically, the max memory needs to be adjusted BEF\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44319",
-    "created_at": "2026-02-27T08:20:45Z",
-    "deletions": 56,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44565",
+    "created_at": "2026-03-10T10:31:10Z",
+    "deletions": 22,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44319/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44319",
+    "files_url": "https://github.com/huggingface/transformers/pull/44565/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44565",
     "labels": [],
     "merged": true,
-    "number": 44319,
-    "review_comments_count": 3,
+    "number": 44565,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Support non-gated experts",
-    "updated_at": "2026-03-02T19:26:38Z"
+    "title": "[device_map] Fix device_map computation by correctly adjusting memory available",
+    "updated_at": "2026-03-10T17:16:01Z"
   },
   {
-    "additions": 10,
-    "author": "yoginlangalia",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Adds input validation for bounding box shape in `LayoutLMv3Tokenizer`. When users pass boxes with fewer (or more) than 4 values per box, the tokenizer now raises a clear `ValueError` instead of a confusing generic\u2026",
-    "changed_files": 1,
+    "additions": 2,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fixes #44360",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44318",
-    "created_at": "2026-02-27T06:40:02Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44564",
+    "created_at": "2026-03-10T10:14:21Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44318/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44318",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44318,
+    "files_url": "https://github.com/huggingface/transformers/pull/44564/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44564",
+    "labels": [],
+    "merged": true,
+    "number": 44564,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Validate bounding box shape in LayoutLMv3Tokenizer",
-    "updated_at": "2026-02-27T14:43:08Z"
+    "title": "Fix glm dsa",
+    "updated_at": "2026-03-19T15:13:36Z"
   },
   {
-    "additions": 4,
-    "author": "sxu75374",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? Passes `config.rms_norm_eps` explicitly to `q_a_layernorm` and `kv_a_layernorm` in both DeepSeek V2 and V3 MLA attention. Currently these two norms are constructed without `eps`, falling back to the `RMSNorm` class\u2026",
-    "changed_files": 2,
+    "additions": 165,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Split out `mm_token_type_id` creation to a separate utility and just call it in VLMs. Also make sure that `mm_token_type_id` can be created even when `padding=False` and the inputs are of different length. As long a\u2026",
+    "changed_files": 37,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44317",
-    "created_at": "2026-02-27T04:48:08Z",
-    "deletions": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44563",
+    "created_at": "2026-03-10T10:13:29Z",
+    "deletions": 267,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44317/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44317",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44317,
-    "review_comments_count": 0,
+    "files_url": "https://github.com/huggingface/transformers/pull/44563/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44563",
+    "labels": [],
+    "merged": true,
+    "number": 44563,
+    "review_comments_count": 4,
     "state": "closed",
-    "title": "fix(deepseek): pass rms_norm_eps to MLA q/kv layernorms",
-    "updated_at": "2026-02-27T14:30:04Z"
+    "title": "Allow `mm_token_type` be non-padded lists ",
+    "updated_at": "2026-03-25T11:33:46Z"
   },
   {
-    "additions": 2,
-    "author": "jashshah999",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Fixes #44303 The weight loading progress bar called `pbar.refresh()` on every single parameter, bypassing tqdm's built-in rate-limiting. When output is redirected to a log file (e.g. in CI), this produced one line per parameter -- hundreds\u2026",
+    "additions": 54,
+    "author": "SamArun28",
+    "author_association": "NONE",
+    "body_excerpt": "#Standardizing the BERT model card as part of issue #36979 Changes made: - Added friendly description of BERT - Added Pipeline and AutoModel code examples - Added Notes section with helpful tips - Added Resources section with links @stevhl\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44316",
-    "created_at": "2026-02-27T03:08:28Z",
-    "deletions": 3,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44562",
+    "created_at": "2026-03-10T09:58:19Z",
+    "deletions": 111,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44316/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44316",
+    "files_url": "https://github.com/huggingface/transformers/pull/44562/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44562",
     "labels": [],
     "merged": false,
-    "number": 44316,
+    "number": 44562,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Reduce tqdm verbosity during weight loading",
-    "updated_at": "2026-03-03T17:02:34Z"
+    "title": "docs: standardize BERT model card",
+    "updated_at": "2026-03-10T16:00:43Z"
   },
   {
-    "additions": 2400,
-    "author": "jp1924",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Hello, Transformers team! I submitted a PR to add naver-hyperclovax/HyperCLOVAX-SEED-Think-32B (hereafter HCX), developed by the Korean IT company Naver while executing the government's national AI model project. Th\u2026",
-    "changed_files": 17,
+    "additions": 368,
+    "author": "rain-1",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "> Adds support for the legacy text completions endpoint, which accepts a freeform text prompt (no chat template) and returns generated text in choices[].text. Supports both streaming and non-streaming modes, suffix for fill-in-the-middle i\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 13,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44314",
-    "created_at": "2026-02-27T02:01:28Z",
-    "deletions": 6,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44558",
+    "created_at": "2026-03-10T07:09:07Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44314/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44314",
+    "files_url": "https://github.com/huggingface/transformers/pull/44558/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44558",
     "labels": [],
     "merged": false,
-    "number": 44314,
-    "review_comments_count": 74,
+    "number": 44558,
+    "review_comments_count": 3,
     "state": "open",
-    "title": "add HyperClovaX Vision",
-    "updated_at": "2026-03-26T07:05:48Z"
+    "title": "Add /v1/completions endpoint (OpenAI legacy completions API) to `transformers serve`",
+    "updated_at": "2026-04-06T09:23:32Z"
   },
   {
-    "additions": 4,
-    "author": "jashshah999",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Fixes the same `TypeError: AddedToken() got multiple values for keyword argument 'special'` that #44281 addressed, but for the `extra_special_tokens` code path which was missed. #44281 (commit 8e663c7) correctly added `value.pop(\"special\",\u2026",
+    "additions": 1,
+    "author": "black-yt",
+    "author_association": "NONE",
+    "body_excerpt": "This PR fixes a runtime `TypeError` encountered during model initialization when using Qwen3.5 configurations with recent `transformers` versions. The error occurs in `modeling_rope_utils.py` during RoPE parameter validation: ``` TypeError\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44313",
-    "created_at": "2026-02-27T01:37:45Z",
+    "comments_count": 16,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44555",
+    "created_at": "2026-03-10T01:52:18Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44313/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44313",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44555/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44555",
+    "labels": [],
     "merged": false,
-    "number": 44313,
+    "number": 44555,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix AddedToken duplicate 'special' kwarg for extra_special_tokens",
-    "updated_at": "2026-02-27T14:26:28Z"
+    "title": "Fix TypeError in RoPE validation when ignore_keys_at_rope_validation is a list",
+    "updated_at": "2026-03-28T10:04:35Z"
   },
   {
-    "additions": 8,
-    "author": "haosenwang1018",
-    "author_association": "NONE",
-    "body_excerpt": "Replace bare `except:` clauses with `except Exception:` for PEP 8 compliance.",
-    "changed_files": 4,
+    "additions": 233,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "As per title, wip and really unsure if we really want this",
+    "changed_files": 16,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44312",
-    "created_at": "2026-02-27T01:00:33Z",
-    "deletions": 8,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44312/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44312",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44553",
+    "created_at": "2026-03-10T01:04:49Z",
+    "deletions": 263,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44553/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44553",
     "labels": [],
     "merged": false,
-    "number": 44312,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: replace 8 bare except clauses with except Exception",
-    "updated_at": "2026-02-27T03:27:27Z"
+    "number": 44553,
+    "review_comments_count": 1,
+    "state": "open",
+    "title": "[`FA`] Refactor FA CB kwargs",
+    "updated_at": "2026-03-17T09:14:21Z"
   },
   {
-    "additions": 38,
-    "author": "onel",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? Adds species bias documentation across the transformers repository to help model authors and users recognize and address potential biases in language models. The updates include guidance on documenting bias categori\u2026",
-    "changed_files": 5,
-    "cluster_id": null,
+    "additions": 0,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "Reverts huggingface/transformers#44529",
+    "changed_files": 1,
+    "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44311",
-    "created_at": "2026-02-27T00:02:49Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44552",
+    "created_at": "2026-03-09T21:05:46Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44311/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44311",
+    "files_url": "https://github.com/huggingface/transformers/pull/44552/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44552",
     "labels": [],
-    "merged": false,
-    "number": 44311,
+    "merged": true,
+    "number": 44552,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add species bias documentation to model cards and docs",
-    "updated_at": "2026-02-27T14:09:20Z"
+    "title": "Revert \"test merge queue 1\"",
+    "updated_at": "2026-03-09T21:15:55Z"
   },
   {
-    "additions": 63,
-    "author": "onel",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? Adds documentation for Pruna AI integration to the Transformers ecosystem, following the existing pattern used by vLLM and Unsloth integration docs. ## Changes - Created `docs/source/en/community_integrations/pruna.\u2026",
+    "additions": 12,
+    "author": "echarlaix",
+    "author_association": "MEMBER",
+    "body_excerpt": "`self.rotary_emb` is always called since https://github.com/huggingface/transformers/pull/39847 while only being initialized when `config.use_mem_rope` is True inference failing since v5 for models `config.use_mem_rope=False` ``` Attribute\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44310",
-    "created_at": "2026-02-27T00:00:31Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44551",
+    "created_at": "2026-03-09T18:15:17Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44310/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44310",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44310,
+    "files_url": "https://github.com/huggingface/transformers/pull/44551/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44551",
+    "labels": [],
+    "merged": true,
+    "number": 44551,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "docs: Add Pruna AI integration documentation",
-    "updated_at": "2026-02-27T14:08:21Z"
+    "title": "Fix zamba2 rotary embedding call when use_mem_rope is False",
+    "updated_at": "2026-03-10T11:43:36Z"
   },
   {
-    "additions": 129,
-    "author": "onel",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? Adds SkyPilot deployment documentation to the DeepSpeed guide. The new section includes: - Introduction to SkyPilot as a unified framework for running AI workloads across clouds and Kubernetes - Complete example YAM\u2026",
+    "additions": 6,
+    "author": "himani27301",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Improved wording and grammar in the Auto Classes documentation to enhance readability without changing functionality. # What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, you\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44309",
-    "created_at": "2026-02-26T22:44:41Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44309/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44309",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44309,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Add SkyPilot deployment documentation to DeepSpeed guide",
-    "updated_at": "2026-02-27T14:09:55Z"
-  },
-  {
-    "additions": 5854,
-    "author": "NielsRogge",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 15,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44308",
-    "created_at": "2026-02-26T21:26:05Z",
-    "deletions": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44550",
+    "created_at": "2026-03-09T16:12:59Z",
+    "deletions": 16,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44308/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44308",
+    "files_url": "https://github.com/huggingface/transformers/pull/44550/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44550",
     "labels": [],
     "merged": false,
-    "number": 44308,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Codex/add sam3 litetext model to transformers fuvllg",
-    "updated_at": "2026-02-26T21:35:44Z"
+    "number": 44550,
+    "review_comments_count": 1,
+    "state": "open",
+    "title": "Improve clarity and grammar in Auto Classes documentation",
+    "updated_at": "2026-03-09T16:32:29Z"
   },
   {
-    "additions": 7,
-    "author": "imstevenpmwork",
+    "additions": 158,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR fixes a `TypeError: not all arguments converted during string formatting` caused by incorrectly passing `FutureWarning` as a second argument to `logger.warning_once()` in this file, introduced in https://git\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? The previous values are simply wrong. (which also cause problems for tiny model creation)",
+    "changed_files": 51,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44307",
-    "created_at": "2026-02-26T20:09:20Z",
-    "deletions": 10,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44549",
+    "created_at": "2026-03-09T15:44:36Z",
+    "deletions": 126,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44307/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44307",
+    "files_url": "https://github.com/huggingface/transformers/pull/44549/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44549",
     "labels": [],
     "merged": true,
-    "number": 44307,
-    "review_comments_count": 2,
+    "number": 44549,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(modeling_attn_mask_utils): remove FutureWarning from logger.warning_once()",
-    "updated_at": "2026-02-26T21:29:01Z"
+    "title": "Fix wrong (non-existing) checkpoints",
+    "updated_at": "2026-03-11T15:16:56Z"
   },
   {
-    "additions": 10,
-    "author": "jashshah999",
+    "additions": 12,
+    "author": "mvanhorn",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes a crash in `PretrainedConfig.update_from_string()` when the input string contains entries without `=` or with multiple `=` signs. **`configuration_utils.py`** - The existing code `dict(x.split(\"=\") for x in up\u2026",
+    "body_excerpt": "Fixes #43935 ## Summary - Added `eval_on_end` argument to `TrainingArguments` (default: `False`) - Added conditional evaluation at the end of training in `Trainer.train()`, symmetric to `eval_on_start` - Implementation mirrors the existing\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44306",
-    "created_at": "2026-02-26T20:02:10Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44548",
+    "created_at": "2026-03-09T15:03:27Z",
     "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44306/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44306",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44548/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44548",
+    "labels": [],
     "merged": false,
-    "number": 44306,
+    "number": 44548,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix ValueError crash in PretrainedConfig.update_from_string on malformed input",
-    "updated_at": "2026-02-27T14:25:03Z"
+    "title": "Add eval_on_end flag to Trainer",
+    "updated_at": "2026-03-09T19:31:59Z"
   },
   {
-    "additions": 7,
-    "author": "jashshah999",
+    "additions": 2,
+    "author": "mvanhorn",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes a mutable default argument and two resource leaks: 1. **`integrations/tpu.py`** - `patched_optimizer_step` used `optimizer_args={}` as a default parameter. Mutable defaults are shared across calls, so any muta\u2026",
-    "changed_files": 3,
+    "body_excerpt": "Fixes #44373 ## Summary - Corrected the docstring for `position_ids` parameter in `prepare_fa_kwargs_from_position_ids` and `_prepare_from_posids` which incorrectly described attention mask semantics (\"Boolean or int tensor... 1 means vali\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44305",
-    "created_at": "2026-02-26T19:22:33Z",
-    "deletions": 5,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44547",
+    "created_at": "2026-03-09T14:59:27Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44305/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44305",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44547/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44547",
+    "labels": [],
     "merged": false,
-    "number": 44305,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix mutable default in TPU optimizer and unclosed file handles",
-    "updated_at": "2026-02-27T14:21:58Z"
+    "number": 44547,
+    "review_comments_count": 3,
+    "state": "open",
+    "title": "Fix position_ids docstring in modeling_flash_attention_utils.py",
+    "updated_at": "2026-03-09T16:08:29Z"
   },
   {
-    "additions": 151,
-    "author": "adil-a",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Adds NeMo Automodel under the community integrations tab. ## Before submitting - [x] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [x] Did you read the [contribut\u2026",
-    "changed_files": 3,
+    "additions": 4,
+    "author": "Abdennacer-Badaoui",
+    "author_association": "MEMBER",
+    "body_excerpt": "The AMD Docker image build (`latest-pytorch-amd`) has been failing since early February due to the 6h GitHub Actions job time limit being exceeded. The root cause is that Flash Attention is cloned and compiled from the latest commit on the\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44304",
-    "created_at": "2026-02-26T17:57:16Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44546",
+    "created_at": "2026-03-09T14:15:51Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44304/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44304",
+    "files_url": "https://github.com/huggingface/transformers/pull/44546/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44546",
     "labels": [],
     "merged": true,
-    "number": 44304,
-    "review_comments_count": 9,
+    "number": 44546,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "docs: Add NeMo Automodel community integration docs",
-    "updated_at": "2026-03-03T16:51:48Z"
+    "title": "Fix AMD Docker image build timeout by pinning Flash Attention commit",
+    "updated_at": "2026-03-09T14:37:50Z"
   },
   {
-    "additions": 28,
-    "author": "michaelbenayoun",
+    "additions": 18,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Integrate the neuron device to TrainingArguments. It enables using the neuron device with the `Trainer` class.",
-    "changed_files": 3,
+    "body_excerpt": "# What does this PR do? As per the title. Fixes https://github.com/huggingface/transformers/issues/44336",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44302",
-    "created_at": "2026-02-26T15:11:09Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44544",
+    "created_at": "2026-03-09T11:38:09Z",
+    "deletions": 18,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44302/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44302",
+    "files_url": "https://github.com/huggingface/transformers/pull/44544/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44544",
     "labels": [],
     "merged": true,
-    "number": 44302,
+    "number": 44544,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Integrate the Neuron device to TrainingArguments",
-    "updated_at": "2026-03-05T15:11:00Z"
+    "title": "Fix ansi codes in loading reports when not connected to terminal",
+    "updated_at": "2026-03-09T11:52:16Z"
   },
   {
-    "additions": 30,
-    "author": "likejazz",
+    "additions": 154,
+    "author": "umbilnm",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? When fine-tuning Qwen3 with frameworks like TRL, `<think>` blocks are silently omitted from the token sequence, causing chain-of-thought reasoning data to be completely lost during training with no error or warning\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? Fixes #44521 `apply_chat_template` with `return_assistant_tokens_mask=True` returns all-zero masks when multimodal inputs (images/videos) are present. ## Root cause `generation_indices` (character-level positions of\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44301",
-    "created_at": "2026-02-26T14:30:24Z",
-    "deletions": 0,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44543",
+    "created_at": "2026-03-09T10:47:05Z",
+    "deletions": 21,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44301/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44301",
+    "files_url": "https://github.com/huggingface/transformers/pull/44543/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44543",
     "labels": [],
     "merged": false,
-    "number": 44301,
+    "number": 44543,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix: Qwen3 `<think>` blocks not written during fine-tuning (TRL)",
-    "updated_at": "2026-03-02T17:18:03Z"
-  },
-  {
-    "additions": 2539,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? The recursive feature is needed for me in https://github.com/huggingface/transformers/pull/44252 to allow timm backbone define its conversion only once. Also it currently allows to delete \"t5gemma2\" from conversion,\u2026",
-    "changed_files": 18,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 43,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44300",
-    "created_at": "2026-02-26T14:09:59Z",
-    "deletions": 470,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44300/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44300",
-    "labels": [],
-    "merged": false,
-    "number": 44300,
-    "review_comments_count": 45,
     "state": "open",
-    "title": "Dynamic weight conversion is recursive",
-    "updated_at": "2026-03-25T16:16:08Z"
+    "title": "Fix assistant_masks for multimodal inputs in apply_chat_template",
+    "updated_at": "2026-03-09T10:47:05Z"
   },
   {
-    "additions": 520,
-    "author": "vasqu",
+    "additions": 3,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "As per title, to allow for proper alignment with vllm/sglang Closes #44258",
-    "changed_files": 21,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 9,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44299",
-    "created_at": "2026-02-26T12:50:03Z",
-    "deletions": 282,
+    "body_excerpt": "# What does this PR do? As per the title",
+    "changed_files": 2,
+    "cluster_id": "cluster-44053-8",
+    "cluster_ids": [
+      "cluster-44053-8"
+    ],
+    "cluster_role": "member",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44542",
+    "created_at": "2026-03-09T10:00:29Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44299/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44299",
+    "files_url": "https://github.com/huggingface/transformers/pull/44542/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44542",
     "labels": [],
     "merged": true,
-    "number": 44299,
-    "review_comments_count": 4,
+    "number": 44542,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": ":rotating_light: [`Ernie 4.5 VL Moe`] Fix up namings to vllm/sglang convention",
-    "updated_at": "2026-02-26T16:42:50Z"
+    "title": "Fix backend dependency",
+    "updated_at": "2026-03-09T10:10:24Z"
   },
   {
-    "additions": 1145,
-    "author": "ArthurZucker",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? A few issues we did not catch: - https://github.com/huggingface/transformers/blob/47b0e478f324b54f177ea7998a0791870fdd0324/src/transformers/convert_slow_tokenizer.py#L1314-L1315 missing from `GemmaTokenier` - SPM's\u2026",
-    "changed_files": 7,
+    "additions": 1,
+    "author": "YangKai0616",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Per the title, an error occurs when `tp_plan` is empty due to [here](https://github.com/huggingface/transformers/blob/701628527ae1ef37473f05f5d94fac7f457a3f8f/src/transformers/core_model_loading.py#L1120): ``` [rank0]: Traceback (most rece\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44298",
-    "created_at": "2026-02-26T12:34:38Z",
-    "deletions": 29,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44298/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44298",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44540",
+    "created_at": "2026-03-09T09:37:36Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44540/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44540",
     "labels": [],
-    "merged": false,
-    "number": 44298,
-    "review_comments_count": 1,
-    "state": "open",
-    "title": "Auto detect wrong mapping models",
-    "updated_at": "2026-03-02T10:13:28Z"
+    "merged": true,
+    "number": 44540,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix UnboundLocalError for tp_plan_alt when tp_plan is empty",
+    "updated_at": "2026-03-11T13:42:39Z"
   },
   {
-    "additions": 12302,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Needed for https://github.com/huggingface/transformers/pull/41250 to pass the docstring-checker in CI. Our basic checker doesn't handle well dataclasses so we can use `autodocstring`",
-    "changed_files": 512,
+    "additions": 1333,
+    "author": "kmswin1",
+    "author_association": "NONE",
+    "body_excerpt": "A.X K1 \ubaa8\ub378 \ucd94\uac00\ud569\ub2c8\ub2e4. \uae30\uc874 inference \uc640 \ub3d9\uc77c\ud55c \uac83 \ud655\uc778\ud588\uc2b5\ub2c8\ub2e4.",
+    "changed_files": 8,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44296",
-    "created_at": "2026-02-26T10:29:21Z",
-    "deletions": 37860,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44539",
+    "created_at": "2026-03-09T09:03:10Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44296/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44296",
+    "files_url": "https://github.com/huggingface/transformers/pull/44539/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44539",
     "labels": [],
-    "merged": true,
-    "number": 44296,
-    "review_comments_count": 11,
+    "merged": false,
+    "number": 44539,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Add auto-docstring on configs",
-    "updated_at": "2026-03-06T11:58:10Z"
+    "title": "Add A.X K1",
+    "updated_at": "2026-03-09T09:04:11Z"
   },
   {
-    "additions": 1,
-    "author": "mario-sanz",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "additions": 14,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Follow-up of #44532: we need to change the required status check to the new added job `doc_build_status_check` added in this PR, otherwise the merge queue won't get the required (passing) status and will eventually\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44294",
-    "created_at": "2026-02-26T08:30:52Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44538",
+    "created_at": "2026-03-09T08:59:12Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44294/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44294",
+    "files_url": "https://github.com/huggingface/transformers/pull/44538/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44538",
     "labels": [],
     "merged": true,
-    "number": 44294,
+    "number": 44538,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix: use `TokenizersBackend` for Olmo3 to preserve custom `pre_tokenizer`",
-    "updated_at": "2026-02-26T10:35:44Z"
+    "title": "Add a new job in `build_pr_documentation.yml` (will be the new required job)",
+    "updated_at": "2026-03-09T09:12:25Z"
   },
   {
-    "additions": 13,
-    "author": "IlyasMoutawwakil",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 15,
+    "additions": 2,
+    "author": "YangKai0616",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Per the title. Supplement skip logic for XPU in the CPU-only tp tests. Hi @IlyasMoutawwakil, please help review, thanks!",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44293",
-    "created_at": "2026-02-26T08:25:23Z",
-    "deletions": 384,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44536",
+    "created_at": "2026-03-09T08:09:41Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44293/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44293",
+    "files_url": "https://github.com/huggingface/transformers/pull/44536/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44536",
     "labels": [],
     "merged": true,
-    "number": 44293,
-    "review_comments_count": 7,
+    "number": 44536,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Strict export cleanup",
-    "updated_at": "2026-03-02T09:36:19Z"
+    "title": "Supplement skip logic for XPU in the CPU-only tp tests",
+    "updated_at": "2026-03-09T10:10:49Z"
   },
   {
-    "additions": 516,
-    "author": "stevhliu",
-    "author_association": "MEMBER",
-    "body_excerpt": "- adds `trainer_recipes.md` to show how to use other practical `Trainer` features outside of the basic training loop - updates hyperparam search docs - updates `optimizers.md` with how to customize it (prebuilt instances, passing a class +\u2026",
-    "changed_files": 6,
+    "additions": 53,
+    "author": "Anakintano",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Problem `Qwen2_5_VLProcessor.apply_chat_template` raises `ValueError: setting an array element with a sequence` when called with a batch of \u22652 conversations that include images under the default `padding=False` setting. **Root cause:**\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44290",
-    "created_at": "2026-02-26T01:02:15Z",
-    "deletions": 210,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44535",
+    "created_at": "2026-03-09T07:54:58Z",
+    "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44290/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44290",
+    "files_url": "https://github.com/huggingface/transformers/pull/44535/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44535",
     "labels": [],
     "merged": false,
-    "number": 44290,
-    "review_comments_count": 17,
+    "number": 44535,
+    "review_comments_count": 0,
     "state": "open",
-    "title": "[docs] optimizers, hyperparam search, training features",
-    "updated_at": "2026-03-18T23:10:48Z"
+    "title": "Fix crash in Qwen2_5_VLProcessor when using batched input with padding=False",
+    "updated_at": "2026-03-09T12:44:00Z"
   },
   {
-    "additions": 8,
-    "author": "haosenwang1018",
+    "additions": 0,
+    "author": "stargazerwh",
     "author_association": "NONE",
-    "body_excerpt": "Replace bare except clauses with except Exception.",
-    "changed_files": 4,
+    "body_excerpt": "## Description The 'transformers run' command was removed in Transformers v5, but the documentation still contained references in 62 model documentation files. This PR removes all `<hfoption id='transformers CLI'>` and `<hfoption id='trans\u2026",
+    "changed_files": 62,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44289",
-    "created_at": "2026-02-26T00:58:35Z",
-    "deletions": 8,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44533",
+    "created_at": "2026-03-09T01:39:32Z",
+    "deletions": 439,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44289/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44289",
+    "files_url": "https://github.com/huggingface/transformers/pull/44533/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44533",
     "labels": [],
     "merged": false,
-    "number": 44289,
+    "number": 44533,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: replace 8 bare except clauses with except Exception",
-    "updated_at": "2026-02-26T12:52:48Z"
+    "title": "docs: Remove references to removed 'transformers run' CLI command",
+    "updated_at": "2026-03-09T02:33:08Z"
   },
   {
-    "additions": 1,
-    "author": "somAzzz",
-    "author_association": "FIRST_TIMER",
-    "body_excerpt": "# What does this PR do? When loading the new Qwen 3.5 models (e.g., 'Qwen/Qwen3.5-35B-A3B') using the 'transformers' (5.3.0.dev0), the initialization crashes with a 'TypeError' . **Error Traceback Context:** (APIServer pid=98544) File \"...\u2026",
+    "additions": 9,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? So we can use `Require Merge Queue` functionoality",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44288",
-    "created_at": "2026-02-25T22:43:41Z",
-    "deletions": 1,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44532",
+    "created_at": "2026-03-08T20:34:05Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44288/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44288",
+    "files_url": "https://github.com/huggingface/transformers/pull/44532/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44532",
     "labels": [],
-    "merged": false,
-    "number": 44288,
-    "review_comments_count": 1,
+    "merged": true,
+    "number": 44532,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix TypeError in modeling_rope_utils.py when ignore_keys_at_rope_vali\u2026",
-    "updated_at": "2026-02-26T20:09:07Z"
+    "title": "Update `build_pr_documentation` workflow for `merge_group` event",
+    "updated_at": "2026-03-08T20:42:57Z"
   },
   {
-    "additions": 11,
-    "author": "jashshah999",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes mutable default arguments and unclosed file handles across several files. **Mutable defaults** (can cause shared state across calls): - `debug_utils.py`: `DebugUnderflowOverflow.__init__` `trace_batch_nums=[]`\u2026",
-    "changed_files": 4,
+    "additions": 16,
+    "author": "s-zx",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Fixes `Qwen2_5_VLProcessor.apply_chat_template` crashing with a `ValueError` when called with a batch of conversations with different prompt lengths and `padding=False` (the default). ### Root cause In the `mm_toke\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44287",
-    "created_at": "2026-02-25T22:23:20Z",
-    "deletions": 7,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44531",
+    "created_at": "2026-03-08T19:38:00Z",
+    "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44287/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44287",
+    "files_url": "https://github.com/huggingface/transformers/pull/44531/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44531",
     "labels": [],
-    "merged": true,
-    "number": 44287,
+    "merged": false,
+    "number": 44531,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix mutable default arguments and resource leaks",
-    "updated_at": "2026-03-02T15:17:25Z"
+    "title": "Fix Qwen2_5_VLProcessor.apply_chat_template crash on unpadded batched input",
+    "updated_at": "2026-03-09T13:14:02Z"
   },
   {
-    "additions": 31,
-    "author": "kathrynle20",
+    "additions": 3,
+    "author": "ydshieh2",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR adds on to the Fouroversix integration by adding support for quantized models such as the gpt-oss model by adding weight conversions and an additional config argument. Reference: https://github.com/huggingfa\u2026",
-    "changed_files": 3,
+    "body_excerpt": "# What does this PR do? Merge queue is enabled, but we need to see if it works well with CircleCI",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44286",
-    "created_at": "2026-02-25T22:15:15Z",
-    "deletions": 14,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44529",
+    "created_at": "2026-03-08T18:12:54Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44286/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44286",
+    "files_url": "https://github.com/huggingface/transformers/pull/44529/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44529",
     "labels": [],
     "merged": true,
-    "number": 44286,
-    "review_comments_count": 12,
+    "number": 44529,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Add future model support for Fouroversix",
-    "updated_at": "2026-03-04T16:28:13Z"
+    "title": "test merge queue 1",
+    "updated_at": "2026-03-09T21:01:09Z"
   },
   {
-    "additions": 3484,
-    "author": "NielsRogge",
+    "additions": 3,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR adds the VidEoMT model, as described in [VidEoMT: Your ViT is Secretly Also a Video Segmentation Model](https://huggingface.co/papers/2602.17807). Gradio demo (running on ZeroGPU): https://huggingface.co/spa\u2026",
-    "changed_files": 17,
+    "body_excerpt": "# What does this PR do? Merge queue is enabled, but we need to see if it works well with CircleCI",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 23,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44285",
-    "created_at": "2026-02-25T19:24:39Z",
-    "deletions": 2,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44528",
+    "created_at": "2026-03-08T17:54:32Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44285/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44285",
-    "labels": [
-      "New model"
+    "files_url": "https://github.com/huggingface/transformers/pull/44528/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44528",
+    "labels": [],
+    "merged": false,
+    "number": 44528,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "test merge queue 1",
+    "updated_at": "2026-03-09T20:39:15Z"
+  },
+  {
+    "additions": 3,
+    "author": "Sai-Suraj-27",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes these failing [MusicgenStereoIntegrationTests](https://github.com/huggingface/transformers/actions/runs/22606636929/job/65500451700#step:14:7870) ## Before submitting - [ ] This PR fixes a typo or improves the\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-43453-9",
+    "cluster_ids": [
+      "cluster-43453-9"
     ],
+    "cluster_role": "canonical",
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44527",
+    "created_at": "2026-03-08T14:26:02Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44527/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44527",
+    "labels": [],
     "merged": true,
-    "number": 44285,
-    "review_comments_count": 57,
+    "number": 44527,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Add VidEoMT",
-    "updated_at": "2026-03-25T17:05:47Z"
+    "title": "Fix failing `MusicgenStereo` integration tests",
+    "updated_at": "2026-03-10T12:28:39Z"
   },
   {
-    "additions": 388,
-    "author": "paipeline",
-    "author_association": "NONE",
-    "body_excerpt": "## Description Fixes #44242 where Mixtral models do not compute auxiliary load balancing loss when `output_router_logits=False`, even when `router_aux_loss_coef > 0`. ## Problem According to the [Mixtral documentation](https://huggingface.\u2026",
-    "changed_files": 4,
+    "additions": 90,
+    "author": "JoursBleu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Add GGUF loading support for MiniMax-M2.1 (456B MoE) model. MiniMax-M2.1 is a large Mixture-of-Experts model with 456B total parameters (45.9B active), 256 experts and 8 experts per token. This PR enables loading it\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44284",
-    "created_at": "2026-02-25T18:38:15Z",
-    "deletions": 3,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44526",
+    "created_at": "2026-03-08T09:57:38Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44284/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44284",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44284,
-    "review_comments_count": 0,
+    "files_url": "https://github.com/huggingface/transformers/pull/44526/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44526",
+    "labels": [],
+    "merged": true,
+    "number": 44526,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Fix Mixtral auxiliary loss computation when output_router_logits=False",
-    "updated_at": "2026-02-26T12:41:46Z"
+    "title": "Add GGUF support for MiniMax-M2.1 model",
+    "updated_at": "2026-03-18T14:39:20Z"
   },
   {
     "additions": 1,
-    "author": "vasqu",
-    "author_association": "MEMBER",
-    "body_excerpt": "Accidentally caused by #43325, wrong naming --> modular doesn't properly convert some files (e.g. kyutai) Also fixes red CI on main",
+    "author": "jnMetaCode",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary Fixes a `KeyError` crash in `_parse_type_hint` in `chat_template_utils.py` (line 117). When processing Union types, the code accesses `subtype[\"type\"]` without checking the key exists. `_get_json_schema_type(Any)` returns `{}` (\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44283",
-    "created_at": "2026-02-25T18:33:17Z",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44525",
+    "created_at": "2026-03-08T09:21:27Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44283/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44283",
+    "files_url": "https://github.com/huggingface/transformers/pull/44525/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44525",
     "labels": [],
     "merged": true,
-    "number": 44283,
+    "number": 44525,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[`Modular`] Fix file type regression",
-    "updated_at": "2026-02-25T20:04:41Z"
+    "title": "Fix KeyError in _parse_type_hint when Union contains Any",
+    "updated_at": "2026-03-09T13:43:23Z"
   },
   {
-    "additions": 5,
-    "author": "Rocketknight1",
-    "author_association": "MEMBER",
-    "body_excerpt": "Response schema save-loading was broken in #40936, this PR restores it! I did most of this in #42300 but missed an issue with loading/saving.",
+    "additions": 1,
+    "author": "jnMetaCode",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary Fixes a bug in `AssistantTracker.is_active()` in `chat_template_utils.py`. After activation via `activate_tracker()`, `_rendered_blocks` and `_generation_indices` are set to list arguments which may be empty `[]`. The `is_active\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44282",
-    "created_at": "2026-02-25T17:57:54Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44524",
+    "created_at": "2026-03-08T09:21:25Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44282/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44282",
+    "files_url": "https://github.com/huggingface/transformers/pull/44524/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44524",
     "labels": [],
     "merged": true,
-    "number": 44282,
+    "number": 44524,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Restore response_schema saving-loading",
-    "updated_at": "2026-02-25T18:27:22Z"
+    "title": "Fix AssistantTracker.is_active() returning False after activation with empty lists",
+    "updated_at": "2026-03-09T13:36:19Z"
   },
   {
-    "additions": 1,
-    "author": "ArthurZucker",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Its a very small fix for #44062",
+    "additions": 2,
+    "author": "jnMetaCode",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary Fixes two small bugs in `load_sharded_checkpoint` in `trainer_utils.py`: **Bug 1 \u2014 Copy-paste error in error message (line 1108):** When reporting unexpected keys, the error message incorrectly says \"Missing key(s)\" instead of \"\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44281",
-    "created_at": "2026-02-25T16:28:37Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44523",
+    "created_at": "2026-03-08T09:21:22Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44281/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44281",
+    "files_url": "https://github.com/huggingface/transformers/pull/44523/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44523",
     "labels": [],
     "merged": true,
-    "number": 44281,
+    "number": 44523,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix special token maps BC",
-    "updated_at": "2026-02-26T10:34:17Z"
+    "title": "Fix error message label and docstring default in load_sharded_checkpoint",
+    "updated_at": "2026-03-10T15:48:41Z"
   },
   {
-    "additions": 614,
-    "author": "RishabhMehra",
-    "author_association": "FIRST_TIMER",
-    "body_excerpt": "# What does this PR do? - Adds an opt-in use_fast_grouping flag to TokenClassificationPipeline to enable a NumPy-vectorised BIO grouping path (~5\u00d7 faster on long sequences) while keeping the legacy path as default. - Improves correctness:\u2026",
-    "changed_files": 3,
-    "cluster_id": null,
+    "additions": 41,
+    "author": "nakigami",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? This PR introduces initial unit test coverage for the `transformers-cli` tool, specifically focusing on diagnostic and model utility commands. Currently, these CLI entry points lack automated tests. These new tests\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44278",
-    "created_at": "2026-02-25T12:49:56Z",
-    "deletions": 63,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44520",
+    "created_at": "2026-03-08T01:30:39Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44278/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44278",
+    "files_url": "https://github.com/huggingface/transformers/pull/44520/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44520",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44278,
+    "number": 44520,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[FEAT] Pipelines - Faster group_entities",
-    "updated_at": "2026-02-25T13:54:58Z"
+    "title": "test(cli): add unit tests for env and model utility commands",
+    "updated_at": "2026-03-09T13:19:15Z"
   },
   {
-    "additions": 171,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? The GLM-ASR integration test in the documentation is a copy of the one in the test suite. This patch removes duplication by: - moving the tests in the docs using `runnables` - see https://github.com/huggingface/doc-\u2026",
-    "changed_files": 10,
+    "additions": 3,
+    "author": "Sai-Suraj-27",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes these failing [MarianIntegrationTests](https://github.com/huggingface/transformers/actions/runs/22606636929/job/65500458014#step:14:6186) <img width=\"2378\" height=\"657\" alt=\"image\" src=\"https://github.com/user\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-43453-9",
+    "cluster_ids": [
+      "cluster-43453-9"
+    ],
+    "cluster_role": "member",
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44519",
+    "created_at": "2026-03-07T19:53:23Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44519/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44519",
+    "labels": [],
+    "merged": true,
+    "number": 44519,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix failing `MarianIntegrationTests`",
+    "updated_at": "2026-03-09T14:11:12Z"
+  },
+  {
+    "additions": 12,
+    "author": "KartikPawade",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes #44514 `Qwen2_5_VLProcessor.__call__` crashed with a `ValueError` when processing a batch of conversations with different lengths and `padding=False` (the default). **Root cause:** The `mm_token_type_ids` blo\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 28,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44277",
-    "created_at": "2026-02-25T08:49:20Z",
-    "deletions": 77,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44518",
+    "created_at": "2026-03-07T19:22:40Z",
+    "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44277/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44277",
+    "files_url": "https://github.com/huggingface/transformers/pull/44518/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44518",
     "labels": [],
     "merged": false,
-    "number": 44277,
-    "review_comments_count": 5,
-    "state": "open",
-    "title": "Use doc-builder runnable example for GLM-ASR",
-    "updated_at": "2026-03-19T09:01:16Z"
+    "number": 44518,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix: Qwen2_5_VLProcessor crashes on batched input when padding=False \u2026",
+    "updated_at": "2026-03-10T18:57:10Z"
   },
   {
-    "additions": 0,
-    "author": "vishalpatil-45",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? This PR addresses the performance regression where `import transformers` takes ~3.5s. The issue was caused by eager imports of heavy backend libraries (like torch/numpy) during the initial module load. By moving the\u2026",
-    "changed_files": 0,
+    "additions": 12637,
+    "author": "ShahVandit",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Adds Qwen3-TTS, a series of text-to-speech models by the Qwen team (Alibaba Group), to Transformers. **Architecture:** - `Qwen3TTSForConditionalGeneration` \u2014 text to multi-codebook speech codes (talker) - `Qwen3TTS\u2026",
+    "changed_files": 16,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44275",
-    "created_at": "2026-02-25T08:27:32Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44517",
+    "created_at": "2026-03-07T18:48:04Z",
+    "deletions": 24,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44275/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44275",
+    "files_url": "https://github.com/huggingface/transformers/pull/44517/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44517",
     "labels": [
-      "Code agent slop"
+      "New model",
+      "Audio"
     ],
     "merged": false,
-    "number": 44275,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "[Fix] Restore lazy loading to improve import performance (#44273)",
-    "updated_at": "2026-02-25T20:37:18Z"
+    "number": 44517,
+    "review_comments_count": 19,
+    "state": "open",
+    "title": "Add qwen3 tts",
+    "updated_at": "2026-03-27T17:01:05Z"
   },
   {
-    "additions": 559,
-    "author": "paipeline",
+    "additions": 65,
+    "author": "JasonCZMeng",
     "author_association": "NONE",
-    "body_excerpt": "## Description Fixes #44242 This PR resolves an issue where the auxiliary load balancing loss was not computed when `output_router_logits=False`, even when `router_aux_loss_coef != 0`. ## Problem The auxiliary loss computation was incorrec\u2026",
-    "changed_files": 6,
+    "body_excerpt": "## Summary Fix `Qwen2_5_VLProcessor.apply_chat_template` crashing with `ValueError` when called with batched inputs of different sequence lengths (ragged lists) and `padding=False` (the default). Fixes #44514 ## Root Cause The `mm_token_ty\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44274",
-    "created_at": "2026-02-25T06:38:02Z",
-    "deletions": 1,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44516",
+    "created_at": "2026-03-07T18:33:40Z",
+    "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44274/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44274",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44516/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44516",
+    "labels": [],
     "merged": false,
-    "number": 44274,
+    "number": 44516,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix auxiliary load balancing loss computation when output_router_logits=False",
-    "updated_at": "2026-02-25T13:36:03Z"
+    "title": "fix(qwen2_5_vl): handle ragged batched input in apply_chat_template",
+    "updated_at": "2026-03-09T13:14:22Z"
   },
   {
     "additions": 1,
-    "author": "hangjun-ezra",
+    "author": "Sai-Suraj-27",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes a `TypeError: unsupported operand type(s) for |: 'list' and 'set'` in `RotaryEmbeddingConfigMixin.convert_rope_params_to_dict` when `ignore_keys_at_rope_validation` is a `list` instead of a `set`. ### Root ca\u2026",
+    "body_excerpt": "# What does this PR do? Fixes this failing [GPTNeoModelLanguageGenerationTest](https://github.com/huggingface/transformers/actions/runs/22606636929/job/65500455894#step:14:1483) <img width=\"2363\" height=\"239\" alt=\"image\" src=\"https://githu\u2026",
     "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44272",
-    "created_at": "2026-02-25T03:52:04Z",
+    "cluster_id": "cluster-43453-9",
+    "cluster_ids": [
+      "cluster-43453-9"
+    ],
+    "cluster_role": "member",
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44515",
+    "created_at": "2026-03-07T18:16:35Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44272/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44272",
+    "files_url": "https://github.com/huggingface/transformers/pull/44515/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44515",
     "labels": [],
     "merged": true,
-    "number": 44272,
+    "number": 44515,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix TypeError in convert_rope_params_to_dict when ignore_keys is a list",
-    "updated_at": "2026-02-25T14:38:36Z"
+    "title": "Fix failing `GPTNeoModelLanguageGenerationTest`",
+    "updated_at": "2026-03-09T14:11:21Z"
   },
   {
-    "additions": 1272,
-    "author": "balak4",
+    "additions": 1,
+    "author": "math-hiyoko",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary - Add GreedyLR, a metric-based adaptive learning rate scheduler that adjusts the learning rate during training based on the current loss - Based on [\"Dynamic Learning Rate Scheduling based on Loss Changes Leads to Faster Converg\u2026",
-    "changed_files": 10,
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 69,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 9,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44271",
-    "created_at": "2026-02-25T01:40:57Z",
-    "deletions": 7,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44513",
+    "created_at": "2026-03-07T16:11:55Z",
+    "deletions": 492,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44271/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44271",
+    "files_url": "https://github.com/huggingface/transformers/pull/44513/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44513",
     "labels": [],
     "merged": true,
-    "number": 44271,
-    "review_comments_count": 3,
+    "number": 44513,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Add GreedyLR adaptive learning rate scheduler",
-    "updated_at": "2026-03-18T18:45:46Z"
+    "title": "Fix: Remove references to transformers run command",
+    "updated_at": "2026-03-09T15:37:16Z"
   },
   {
-    "additions": 88,
-    "author": "yonigozlan",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? A lot of ProcessorsKwargs have incorrect/unspecified type hints in their ProcessorsKwargs TypedDict for their images_kwargs attribute. Functionnaly, this did not cause issues as \"_merge_kwargs\" automatically picks u\u2026",
-    "changed_files": 44,
+    "additions": 4,
+    "author": "04cb",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes #44355. The inspect.getsource() call raises TypeError when running compiled Python files with Cython-compiled functions. Added try-except block to gracefully handle this case by returning a default indentation level of 4.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44270",
-    "created_at": "2026-02-25T00:11:31Z",
-    "deletions": 0,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44511",
+    "created_at": "2026-03-07T05:36:25Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44270/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44270",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44511/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44511",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44270,
+    "number": 44511,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Add correct typing to custom images_kwargs in ProcessorsKwargs",
-    "updated_at": "2026-02-25T01:12:06Z"
+    "state": "closed",
+    "title": "Fix get_docstring_indentation_level to handle compiled functions",
+    "updated_at": "2026-03-09T13:10:06Z"
   },
   {
-    "additions": 30,
-    "author": "yonigozlan",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This is a follow-up to https://github.com/huggingface/transformers/pull/43748, and will allow to have clickable links to the full modality kwargs when present in the docstring of a processor or image processor Cc @s\u2026",
-    "changed_files": 3,
+    "additions": 3,
+    "author": "math-hiyoko",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 27,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44269",
-    "created_at": "2026-02-25T00:05:47Z",
-    "deletions": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44510",
+    "created_at": "2026-03-06T23:37:51Z",
+    "deletions": 358,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44269/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44269",
+    "files_url": "https://github.com/huggingface/transformers/pull/44510/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44510",
     "labels": [],
     "merged": true,
-    "number": 44269,
-    "review_comments_count": 0,
+    "number": 44510,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Add `ProcessingKwargs` `ImagesKwargs` etc. to docs",
-    "updated_at": "2026-02-27T19:03:15Z"
-  },
+    "title": "Fix: Remove references to `text2text-generation`, `summarization` and `translation` pipeline tasks",
+    "updated_at": "2026-03-10T00:39:30Z"
+  },
   {
-    "additions": 5,
-    "author": "ethanknights",
+    "additions": 8,
+    "author": "KartikPawade",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Some improvements to the `trainer.py` docs. ## Before submitting - [x] This PR fixes a typo or improves the docs. ## Who can review? Documentation: @stevhliu",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? Older OwlViT checkpoints stored `position_ids` as buffers in the text and vision embedding modules. These tensors are simple integer ranges (0 \u2192 max sequence length) and are now recomputed dynamically during initial\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44268",
-    "created_at": "2026-02-24T23:20:16Z",
-    "deletions": 4,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44508",
+    "created_at": "2026-03-06T18:49:59Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44268/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44268",
+    "files_url": "https://github.com/huggingface/transformers/pull/44508/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44508",
     "labels": [],
     "merged": true,
-    "number": 44268,
+    "number": 44508,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "chore: fixes in `Trainer` class docs (`compute_loss` & `hyperparameter_search`)",
-    "updated_at": "2026-02-26T00:50:23Z"
+    "title": "Fix unexpected `position_ids` keys when loading OwlViT models",
+    "updated_at": "2026-03-18T18:30:48Z"
   },
   {
     "additions": 4,
-    "author": "manavshrivastavagit",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary - Update the `DocumentQuestionAnsweringPipeline` docstring to explicitly mention the task summary in the Transformers documentation. - Remove the stale TODO comment now that document question answering is covered in the task sum\u2026",
+    "author": "0xDELUXA",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? `torch.distributed.fsdp` is not available in all PyTorch builds (for example, Windows ROCm). Importing it unconditionally at the top level causes an immediate crash with: ``` ModuleNotFoundError: No module named 'to\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44267",
-    "created_at": "2026-02-24T20:35:18Z",
-    "deletions": 4,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44507",
+    "created_at": "2026-03-06T18:03:49Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44267/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44267",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44267,
+    "files_url": "https://github.com/huggingface/transformers/pull/44507/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44507",
+    "labels": [],
+    "merged": true,
+    "number": 44507,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Docs: point DocumentQuestionAnswering pipeline to task summary",
-    "updated_at": "2026-02-25T13:34:48Z"
+    "title": "Fix: Conditionally import `torch.distributed.fsdp` in `trainer_seq2seq.py`",
+    "updated_at": "2026-03-13T10:17:56Z"
   },
   {
-    "additions": 27,
-    "author": "harshaljanjani",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? The following issue was identified and fixed in this PR: \u2192 **Reasoning:** The impact of this fix goes beyond `Mask2Former` and `DeformableDetr` and should fix any model that uses `torch_compilable_check`. Most use\u2026",
-    "changed_files": 2,
+    "additions": 1,
+    "author": "michaelbenayoun",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? The current implementation does not work with the `mps` device and TP. ## Example script script.py ``` import os os.environ[\"PYTORCH_ENABLE_MPS_FALLBACK\"] = \"1\" import torch from transformers import AutoModelForCaus\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 8,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44266",
-    "created_at": "2026-02-24T20:02:06Z",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44506",
+    "created_at": "2026-03-06T18:03:33Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44266/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44266",
+    "files_url": "https://github.com/huggingface/transformers/pull/44506/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44506",
     "labels": [],
     "merged": true,
-    "number": 44266,
+    "number": 44506,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(utils): Make torch_compilable_check compatible with torch.export strict mode",
-    "updated_at": "2026-02-26T09:42:47Z"
+    "title": "Tensor Parallelism and `mps` device",
+    "updated_at": "2026-03-11T15:16:49Z"
   },
   {
-    "additions": 90,
-    "author": "vasqu",
-    "author_association": "MEMBER",
-    "body_excerpt": "As per title, WIP --> needs a test",
-    "changed_files": 36,
+    "additions": 16,
+    "author": "kushalkkb",
+    "author_association": "NONE",
+    "body_excerpt": "This PR improves error handling in the load_vocab function. Changes: - Added validation to ensure vocab_file is a string path - Added check for file existence - Raised clearer FileNotFoundError when vocabulary file is missing This improves\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44264",
-    "created_at": "2026-02-24T18:06:58Z",
-    "deletions": 210,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44264/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44264",
-    "labels": [],
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44505",
+    "created_at": "2026-03-06T17:47:37Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44505/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44505",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44264,
-    "review_comments_count": 3,
-    "state": "open",
-    "title": "[`Moe`] Enable aux loss automatically when in training + coef is not 0",
-    "updated_at": "2026-02-25T18:53:20Z"
+    "number": 44505,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Improve error handling in load_vocab for invalid vocabulary path",
+    "updated_at": "2026-03-10T04:14:31Z"
   },
   {
-    "additions": 5882,
-    "author": "SunMarc",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR refactor the common tests that we have in Trainer. I've mainly did the following: - Split the tests that we have in `test_trainer.py` into multiple files. - Fix common tests that were failing in the CI",
-    "changed_files": 18,
+    "additions": 13,
+    "author": "kushalkkb",
+    "author_association": "NONE",
+    "body_excerpt": "This PR improves error handling in the load_vocab function. Changes: - Added validation to ensure vocab_file is a string path - Added check for file existence - Raised clearer FileNotFoundError when vocabulary file is missing This improves\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44260",
-    "created_at": "2026-02-24T15:51:11Z",
-    "deletions": 6147,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44504",
+    "created_at": "2026-03-06T17:24:10Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44260/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44260",
+    "files_url": "https://github.com/huggingface/transformers/pull/44504/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44504",
     "labels": [],
-    "merged": true,
-    "number": 44260,
-    "review_comments_count": 3,
+    "merged": false,
+    "number": 44504,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Update common tests Trainer",
-    "updated_at": "2026-02-27T17:31:59Z"
+    "title": "Improve error handling in load_vocab for invalid vocabulary path",
+    "updated_at": "2026-03-06T17:46:17Z"
   },
   {
-    "additions": 1830,
-    "author": "winglian",
-    "author_association": "COLLABORATOR",
-    "body_excerpt": "# What does this PR do? This PR supersedes #43985 to replace the dataset/sampler/dataloader with a data producer that should allow us to more easily get to the next step of async training for RL. <!-- Congratulations! You've made it this f\u2026",
-    "changed_files": 6,
+    "additions": 8,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? The following failing tests were identified and fixed in this PR: \u2192 **Moonshine:** In [MoonshineEncoder.forward](https://github.com/huggingface/transformers/blob/main/src/transformers/models/moonshine/modular_moon\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44259",
-    "created_at": "2026-02-24T15:01:56Z",
-    "deletions": 59,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44259/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44259",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44503",
+    "created_at": "2026-03-06T17:08:00Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44503/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44503",
     "labels": [],
-    "merged": false,
-    "number": 44259,
+    "merged": true,
+    "number": 44503,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Async data producer",
-    "updated_at": "2026-02-26T19:57:43Z"
+    "state": "closed",
+    "title": "fix(testing): Fix MoonshineEncoder UnboundLocalError and Florence2VisionBackbone dtype mismatch",
+    "updated_at": "2026-03-09T18:06:17Z"
   },
   {
-    "additions": 8,
-    "author": "vasqu",
+    "additions": 1,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "The old one has a merge conflict and it was easier to just mirror into a new branch / PR Note that this only affects the local big tests that I used to run on the A100s locally; not to be run with the CI (too big)",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? As per the title. Introduced in https://github.com/huggingface/transformers/pull/44381, not sure why the CI passed",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44258",
-    "created_at": "2026-02-24T15:00:29Z",
-    "deletions": 8,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44502",
+    "created_at": "2026-03-06T17:03:17Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44258/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44258",
+    "files_url": "https://github.com/huggingface/transformers/pull/44502/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44502",
     "labels": [],
-    "merged": false,
-    "number": 44258,
+    "merged": true,
+    "number": 44502,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[`Ernie 4.5 VL Moe`] Change revision",
-    "updated_at": "2026-03-14T19:59:05Z"
+    "title": "Fix type checker",
+    "updated_at": "2026-03-06T17:09:37Z"
   },
   {
-    "additions": 3,
-    "author": "winglian",
-    "author_association": "COLLABORATOR",
-    "body_excerpt": "# What does this PR do? When post training using context parallelism, some processes may have their chunk of the sample input masked out leading to a NaN loss for that process. Using `nanmean` allows us to keep the real loss that isn't `Na\u2026",
+    "additions": 1,
+    "author": "frogNotToad",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Changes the word \"maximize\" to \"minimize\" in the docs Fixes #44492 ## Before submitting - [x] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). ## Who can review? Anyon\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44257",
-    "created_at": "2026-02-24T14:56:42Z",
-    "deletions": 3,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44501",
+    "created_at": "2026-03-06T16:58:14Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44257/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44257",
+    "files_url": "https://github.com/huggingface/transformers/pull/44501/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44501",
     "labels": [],
-    "merged": false,
-    "number": 44257,
-    "review_comments_count": 5,
-    "state": "open",
-    "title": "use nanmean for aggregating loss",
-    "updated_at": "2026-02-25T17:01:08Z"
+    "merged": true,
+    "number": 44501,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fixed typo in docs/source/en/kv_cache.md",
+    "updated_at": "2026-03-06T20:05:36Z"
   },
   {
-    "additions": 10,
-    "author": "albertvillanova",
+    "additions": 18,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "Fix CLI NameError: name 'TypeAdapter' is not defined: - Do not evaluate type annotations in CLI serve ### Problem Calling the CLI raises NameError: > NameError: name 'TypeAdapter' is not defined ```bash transformers --help ``` ```python Tr\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? - Do proper type check in case jax is installed. - Make sure older torch versions don't raise typing issues",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44256",
-    "created_at": "2026-02-24T14:54:49Z",
-    "deletions": 9,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44500",
+    "created_at": "2026-03-06T16:56:12Z",
+    "deletions": 16,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44256/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44256",
+    "files_url": "https://github.com/huggingface/transformers/pull/44500/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44500",
     "labels": [],
     "merged": true,
-    "number": 44256,
+    "number": 44500,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix CLI NameError: name 'TypeAdapter' is not defined",
-    "updated_at": "2026-03-12T07:57:14Z"
+    "title": "Follow-up typing checking fixes",
+    "updated_at": "2026-03-09T10:47:31Z"
   },
   {
-    "additions": 404,
-    "author": "itazap",
+    "additions": 11,
+    "author": "michaelbenayoun",
     "author_association": "MEMBER",
-    "body_excerpt": "## What this PR does Given he different issues that were noticed by @hmellor on vLLM, we wanted to make sure we did not end up with crazy breaks. We ran a full test suite (code can be found in #44298) and the results showed 22 model conver\u2026",
-    "changed_files": 24,
+    "body_excerpt": "# What does this PR do? This PR is just a small cleanup. The `TensorParallelLayer` class defines `_prepare_input_fn` and `_prepare_output_fn` as static methods. But then these methods end-up being instance or static methods in the sub clas\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44255",
-    "created_at": "2026-02-24T14:17:00Z",
-    "deletions": 205,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44499",
+    "created_at": "2026-03-06T16:46:18Z",
+    "deletions": 18,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44255/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44255",
+    "files_url": "https://github.com/huggingface/transformers/pull/44499/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44499",
     "labels": [],
     "merged": true,
-    "number": 44255,
-    "review_comments_count": 25,
+    "number": 44499,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[vllm + v5 fix] handle TokenizersBackend fallback properly for v5",
-    "updated_at": "2026-03-23T11:07:37Z"
+    "title": "Make `_prepare_input_fn` and `_prepare_output_fn` instance methods",
+    "updated_at": "2026-03-10T13:53:18Z"
   },
   {
-    "additions": 16,
-    "author": "mario-sanz",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully refle\u2026",
-    "changed_files": 2,
+    "additions": 1,
+    "author": "michaelbenayoun",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Add the `neuron` backend for initialization in TP.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44254",
-    "created_at": "2026-02-24T13:54:30Z",
-    "deletions": 13,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44498",
+    "created_at": "2026-03-06T16:23:18Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44254/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44254",
+    "files_url": "https://github.com/huggingface/transformers/pull/44498/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44498",
     "labels": [],
-    "merged": false,
-    "number": 44254,
-    "review_comments_count": 5,
+    "merged": true,
+    "number": 44498,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix fast tokenizers overwriting custom `pre_tokenizer` from `tokenizer.json`",
-    "updated_at": "2026-02-26T08:45:56Z"
+    "title": "feat: add neuron in tensor parallelism initialization",
+    "updated_at": "2026-03-12T18:07:52Z"
   },
   {
-    "additions": 9,
-    "author": "Rocketknight1",
+    "additions": 43,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "`create_import_structure_from_path` does some redundant `os` calls, so I'm experimenting with changes to see if we can speed up loading a lot. Related to #44246",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? As per the title. Fixes https://github.com/huggingface/transformers/issues/44466 and avoid issues with torch `.bin` checkpoints which always contain both keys!",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44253",
-    "created_at": "2026-02-24T13:03:40Z",
-    "deletions": 15,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44497",
+    "created_at": "2026-03-06T16:21:14Z",
+    "deletions": 14,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44253/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44253",
+    "files_url": "https://github.com/huggingface/transformers/pull/44497/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44497",
     "labels": [],
     "merged": true,
-    "number": 44253,
+    "number": 44497,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Speed create_import_structure up with os.scandir()",
-    "updated_at": "2026-03-10T12:49:42Z"
+    "title": "[tie weights] \ud83d\udea8 If both weights are present with same weights, still tie them",
+    "updated_at": "2026-03-09T15:00:25Z"
   },
   {
-    "additions": 718,
-    "author": "zucchini-nlp",
+    "additions": 69,
+    "author": "vasqu",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Deprecate timm backbone in favor of keeping all models within one `timm` folder, similar to other vision models. A backbone is just a variation of `PreTrainedModel`",
-    "changed_files": 61,
+    "body_excerpt": "As per title, WIP",
+    "changed_files": 354,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 8,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44252",
-    "created_at": "2026-02-24T13:00:59Z",
-    "deletions": 772,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44252/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44252",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44495",
+    "created_at": "2026-03-06T13:57:04Z",
+    "deletions": 521,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44495/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44495",
     "labels": [],
     "merged": false,
-    "number": 44252,
-    "review_comments_count": 9,
+    "number": 44495,
+    "review_comments_count": 0,
     "state": "open",
-    "title": "Timm unification continued",
-    "updated_at": "2026-02-26T13:35:44Z"
+    "title": "[`Gradient Ckpting`] Remove unnecessary attribute definitions",
+    "updated_at": "2026-03-06T13:58:22Z"
   },
   {
-    "additions": 1951,
-    "author": "Sai-Suraj-27",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Model Page: https://huggingface.co/jinaai/jina-embeddings-v3 Model Paper: https://huggingface.co/papers/2409.10173 Downloads last month > **5.3M** Completes Part of https://github.com/huggingface/transformers/issues\u2026",
-    "changed_files": 13,
+    "additions": 13,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "- updates `ty` to `0.2.0` - pinned regex package (older versions did not have typing stubs) - fixed a couple of typing failures that went through via other parallel branches",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 29,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44251",
-    "created_at": "2026-02-24T12:56:24Z",
-    "deletions": 0,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44494",
+    "created_at": "2026-03-06T12:57:25Z",
+    "deletions": 11,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44251/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44251",
-    "labels": [
-      "New model"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44494/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44494",
+    "labels": [],
     "merged": true,
-    "number": 44251,
-    "review_comments_count": 74,
+    "number": 44494,
+    "review_comments_count": 3,
     "state": "closed",
-    "title": "Add `Jina-Embeddings-V3` Model",
-    "updated_at": "2026-03-19T10:07:57Z"
+    "title": "Update `ty` to 0.0.20",
+    "updated_at": "2026-03-06T13:30:25Z"
   },
   {
-    "additions": 5,
+    "additions": 439,
     "author": "SunMarc",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR fixes https://github.com/huggingface/transformers/pull/43806#discussion_r2834269455. We removed `self.report_to == \"all\"` functionality by mistake. Adding it back !",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? Since I removed some folders (fsdp, deepspeed) related to training, I need to modify the workflows !",
+    "changed_files": 18,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44250",
-    "created_at": "2026-02-24T12:38:21Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44491",
+    "created_at": "2026-03-06T11:15:42Z",
+    "deletions": 647,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44250/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44250",
+    "files_url": "https://github.com/huggingface/transformers/pull/44491/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44491",
     "labels": [],
     "merged": true,
-    "number": 44250,
-    "review_comments_count": 0,
+    "number": 44491,
+    "review_comments_count": 3,
     "state": "closed",
-    "title": "fix regression report_to \"all\"",
-    "updated_at": "2026-02-24T12:55:06Z"
+    "title": "Fix training ci and clean some tests",
+    "updated_at": "2026-03-11T16:27:57Z"
   },
   {
-    "additions": 9,
-    "author": "Ryan-J-MAX",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR fix? This PR adds backward compatibility for the deprecated `grouped_entities` parameter in the `TokenClassificationPipeline`. ## Problem The `grouped_entities` parameter was deprecated in favor of `aggregation_strateg\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44249",
-    "created_at": "2026-02-24T10:48:54Z",
+    "additions": 4,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "@ArthurZucker @Cyrilvallez pls help review, thx! This PR fixes failed test case: `pytest -rA tests/models/eurobert/test_modeling_eurobert.py::EuroBertModelTest::test_model_parallelism`",
+    "changed_files": 2,
+    "cluster_id": "cluster-43324-21",
+    "cluster_ids": [
+      "cluster-43324-21"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44490",
+    "created_at": "2026-03-06T10:56:48Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44249/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44249",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44249,
+    "files_url": "https://github.com/huggingface/transformers/pull/44490/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44490",
+    "labels": [],
+    "merged": true,
+    "number": 44490,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: add backward compatibility for grouped_entities parameter",
-    "updated_at": "2026-02-24T12:31:26Z"
+    "title": "fix model parallelism bug for eurobert model",
+    "updated_at": "2026-04-01T08:25:28Z"
   },
   {
-    "additions": 12,
-    "author": "yonigozlan",
+    "additions": 310,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fix backward compatibility with remote code for old processors not defining valid_kwargs (e.g. phi4) Cc @zucchini-nlp Fix `test_processor_override` for phi3v and phi4 in vllm @hmellor",
-    "changed_files": 1,
+    "body_excerpt": "This PR makes `.ai` the single source of truth for agent templates and skills, and adds explicit `Makefile` targets to generate `Codex` and `Claude Code` specific artifacts. It contains a first skill aimed at properly dealing with typing e\u2026",
+    "changed_files": 7,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44245",
-    "created_at": "2026-02-23T21:47:19Z",
-    "deletions": 4,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44489",
+    "created_at": "2026-03-06T08:42:12Z",
+    "deletions": 62,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44245/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44245",
+    "files_url": "https://github.com/huggingface/transformers/pull/44489/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44489",
     "labels": [],
     "merged": true,
-    "number": 44245,
-    "review_comments_count": 3,
+    "number": 44489,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Fix image processors `from_dict` backward compatibility with old remote code",
-    "updated_at": "2026-02-24T15:17:37Z"
+    "title": "Centralize AI agent templates in `.ai`",
+    "updated_at": "2026-03-18T14:17:22Z"
   },
   {
-    "additions": 63,
-    "author": "thakoreh",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #44242 Load balancing loss was not being added when `output_router_logits=False` in Mixtral models. ## Changes - Fixed loss calculation to include load balancing even when router logits are not output - Added test case ##\u2026",
-    "changed_files": 2,
+    "additions": 482,
+    "author": "abhijeet-dhumal",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes #44486 Adds `KubeflowCallback` to enable automatic progress and metrics reporting for training jobs running on [Kubeflow Trainer](https://github.com/kubeflow/trainer). When training runs inside a Kubeflow Trai\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44243",
-    "created_at": "2026-02-23T21:27:09Z",
-    "deletions": 5,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44487",
+    "created_at": "2026-03-06T08:31:30Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44243/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44243",
+    "files_url": "https://github.com/huggingface/transformers/pull/44487/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44487",
     "labels": [],
-    "merged": false,
-    "number": 44243,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44487,
+    "review_comments_count": 8,
     "state": "closed",
-    "title": "fix: add load balancing loss when output_router_logits=False",
-    "updated_at": "2026-02-23T21:54:11Z"
+    "title": "feat(integration): Add KubeflowCallback to enable automatic progress \u2026",
+    "updated_at": "2026-03-18T14:58:23Z"
   },
   {
-    "additions": 9,
-    "author": "yushiran",
+    "additions": 691,
+    "author": "kaixuanliu",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary Adds missing `-> bool`, `-> int`, and `-> str | None` return type annotations to public utility functions in `utils/generic.py`, making them consistent with the newer `is_timm_config_dict` and `is_timm_local_checkpoint` function\u2026",
+    "body_excerpt": "@IlyasMoutawwakil pls help review, thx!",
     "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
+    "cluster_id": "cluster-43324-21",
+    "cluster_ids": [
+      "cluster-43324-21"
+    ],
+    "cluster_role": "member",
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44241",
-    "created_at": "2026-02-23T19:50:05Z",
-    "deletions": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44482",
+    "created_at": "2026-03-06T02:39:41Z",
+    "deletions": 332,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44241/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44241",
+    "files_url": "https://github.com/huggingface/transformers/pull/44482/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44482",
     "labels": [],
     "merged": true,
-    "number": 44241,
+    "number": 44482,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: add missing return type annotations to type-checking utilities in generic.py",
-    "updated_at": "2026-02-24T13:27:11Z"
+    "title": "add XPU Expectations for higgs_audio_v2 tests",
+    "updated_at": "2026-03-10T08:38:56Z"
   },
   {
-    "additions": 2,
-    "author": "tarekziade",
+    "additions": 2353,
+    "author": "XingyuHu109",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Summary This PR adds native Transformers support for DeepSeek-V3.2. It introduces a new `deepseek_v32` model family so the official checkpoints resolve through the standard auto classes without `trust_remote_code`. The implementation ke\u2026",
+    "changed_files": 19,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44481",
+    "created_at": "2026-03-05T21:14:38Z",
+    "deletions": 30,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44481/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44481",
+    "labels": [],
+    "merged": false,
+    "number": 44481,
+    "review_comments_count": 4,
+    "state": "open",
+    "title": "Add native DeepSeek-V3.2 support",
+    "updated_at": "2026-03-12T16:02:46Z"
+  },
+  {
+    "additions": 3,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Makes sure `find_bad_commit` always return the result `dict`",
+    "body_excerpt": "# What does this PR do? add `diffusers` to docker file for `VibeVoice` (added in PR #40546).",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44240",
-    "created_at": "2026-02-23T19:12:49Z",
-    "deletions": 1,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44480",
+    "created_at": "2026-03-05T20:54:07Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44240/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44240",
+    "files_url": "https://github.com/huggingface/transformers/pull/44480/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44480",
     "labels": [],
     "merged": true,
-    "number": 44240,
+    "number": 44480,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix return value - fixes #44238",
-    "updated_at": "2026-02-24T13:02:59Z"
+    "title": "Add `diffusers` to CI docker file",
+    "updated_at": "2026-03-05T21:11:17Z"
   },
   {
-    "additions": 253,
-    "author": "stevhliu",
+    "additions": 116,
+    "author": "BenjaminBossan",
     "author_association": "MEMBER",
-    "body_excerpt": "part 2 of refactoring the training docs adds new dedicated guide to callbacks and data collators todo: - [x] backlink to `## Next steps` in `trainer.md` once https://github.com/huggingface/transformers/pull/44185 is merged",
-    "changed_files": 7,
+    "body_excerpt": "Required fixes: - some code was using unordered data structures, making weight order random - adjust alpha to offset increased rank from fusion - import functions from PEFT if available See https://github.com/huggingface/peft/pull/3083.",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44239",
-    "created_at": "2026-02-23T18:54:55Z",
-    "deletions": 47,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44478",
+    "created_at": "2026-03-05T17:19:31Z",
+    "deletions": 26,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44239/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44239",
+    "files_url": "https://github.com/huggingface/transformers/pull/44478/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44478",
     "labels": [],
     "merged": true,
-    "number": 44239,
-    "review_comments_count": 10,
+    "number": 44478,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "[docs] callbacks and collators",
-    "updated_at": "2026-02-24T22:12:46Z"
+    "title": "[WIP] FIX Make Mixtral LoRA loading work",
+    "updated_at": "2026-03-11T17:44:20Z"
   },
   {
     "additions": 1,
-    "author": "eustlb",
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? small nit but will be misleading if not fixed",
+    "body_excerpt": "# What does this PR do? As per the title. It's quite a random rule to fix https://huggingface.co/fixie-ai/ultravox-v0_5-llama-3_2-1b to be honest",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44237",
-    "created_at": "2026-02-23T17:52:17Z",
-    "deletions": 1,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44477",
+    "created_at": "2026-03-05T16:58:29Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44237/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44237",
+    "files_url": "https://github.com/huggingface/transformers/pull/44477/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44477",
     "labels": [],
-    "merged": true,
-    "number": 44237,
+    "merged": false,
+    "number": 44477,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[mimi] nit",
-    "updated_at": "2026-02-24T15:43:55Z"
+    "title": "[vllm compat] Fix remote code inits",
+    "updated_at": "2026-03-11T10:34:06Z"
   },
   {
-    "additions": 109,
-    "author": "SunMarc",
+    "additions": 4,
+    "author": "Rocketknight1",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Supersedes https://github.com/huggingface/transformers/pull/43847 When using zero3 + from_config, the model was incorrectly initialized as we were not gathering the params. Added a test also. cc @tohtana",
-    "changed_files": 5,
+    "body_excerpt": "I made an oversight in the fix at #43981 - I didn't realize the dim order changed for torch, so the test was still flaky for torch tensors. The fix reduced the flaky frequency a lot so I thought it had been fixed, but actually it's still t\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44236",
-    "created_at": "2026-02-23T17:20:01Z",
-    "deletions": 3,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44476",
+    "created_at": "2026-03-05T16:39:44Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44236/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44236",
+    "files_url": "https://github.com/huggingface/transformers/pull/44476/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44476",
     "labels": [],
     "merged": true,
-    "number": 44236,
+    "number": 44476,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix zero3 init config",
-    "updated_at": "2026-02-27T11:36:19Z"
+    "title": "Fix Llava tests for torch too!",
+    "updated_at": "2026-03-11T16:47:05Z"
   },
   {
     "additions": 1,
     "author": "itazap",
     "author_association": "MEMBER",
-    "body_excerpt": "UPDATE TO: https://github.com/huggingface/transformers/pull/44179/changes Models with incorrect tokenizer_class in tokenization_config.json that should use TokenziersBackend",
+    "body_excerpt": "chameleon added to MODELS_WITH_INCORRECT_HUB_TOKENIZER_CLASS",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44235",
-    "created_at": "2026-02-23T17:06:54Z",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44475",
+    "created_at": "2026-03-05T16:29:18Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44235/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44235",
+    "files_url": "https://github.com/huggingface/transformers/pull/44475/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44475",
     "labels": [],
     "merged": true,
-    "number": 44235,
+    "number": 44475,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "update fuyu tokenizer class",
-    "updated_at": "2026-02-23T17:36:22Z"
+    "title": "chameleon added to MODELS_WITH_INCORRECT_HUB_TOKENIZER_CLASS",
+    "updated_at": "2026-03-09T22:33:20Z"
   },
   {
-    "additions": 249,
-    "author": "yonigozlan",
-    "author_association": "MEMBER",
-    "body_excerpt": "Cc @zucchini-nlp",
-    "changed_files": 3,
+    "additions": 875,
+    "author": "JJJYmmm",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fix https://github.com/QwenLM/Qwen3.5/issues/58. In the latest code, Qwen3VL and Qwen3.5 use the same `get_rope_index` func of Qwen2VL. But they should be different since Qwen3VL/Qwen3.5 introduce text timestamps. T\u2026",
+    "changed_files": 9,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44234",
-    "created_at": "2026-02-23T17:03:05Z",
-    "deletions": 55,
+    "comments_count": 10,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44474",
+    "created_at": "2026-03-05T15:46:09Z",
+    "deletions": 107,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44234/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44234",
+    "files_url": "https://github.com/huggingface/transformers/pull/44474/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44474",
     "labels": [],
     "merged": true,
-    "number": 44234,
-    "review_comments_count": 2,
+    "number": 44474,
+    "review_comments_count": 10,
     "state": "closed",
-    "title": "Add processing tests for phi4 multimodal",
-    "updated_at": "2026-02-23T22:08:11Z"
+    "title": "[Bugfix] fix video inference of qwen3vl and qwen3.5 series",
+    "updated_at": "2026-03-10T09:52:44Z"
   },
   {
-    "additions": 219,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "Extends `ty` coverage to `src/transformers/generation` - Added a dedicated type-check wrapper script: `utils/check_types.py`. - Updated `Makefile` to run `ty` checks through the wrapper in both `style` and `check-repo`. - merged all typing\u2026",
-    "changed_files": 15,
+    "additions": 137,
+    "author": "winglian",
+    "author_association": "COLLABORATOR",
+    "body_excerpt": "# What does this PR do? supersedes #44446 on `main`, when loading to cpu and using meta devices for non-rank0 processes, it now re-initializes weights on those processes as well as uses more CPU memory. In testing with loading llama3-8b. m\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 12,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44233",
-    "created_at": "2026-02-23T16:23:24Z",
-    "deletions": 101,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44473",
+    "created_at": "2026-03-05T14:52:15Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44233/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44233",
+    "files_url": "https://github.com/huggingface/transformers/pull/44473/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44473",
     "labels": [],
     "merged": true,
-    "number": 44233,
-    "review_comments_count": 33,
+    "number": 44473,
+    "review_comments_count": 4,
     "state": "closed",
-    "title": "chore(typing): Add type checking to `src/transformers/generation`",
-    "updated_at": "2026-03-04T17:24:37Z"
+    "title": "fix FSDP loading with meta devices",
+    "updated_at": "2026-03-09T15:46:22Z"
   },
   {
-    "additions": 11,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? per https://code.claude.com/docs/en/claude-code-on-the-web#best-practices `CLAUDE.md` can alias directly into `AGENTS.md`",
-    "changed_files": 2,
+    "additions": 13,
+    "author": "jblox26",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this fix? Running video inference with any `Qwen3VL` model raises `StopIteration` during `model.generate()`: ``` File \".../transformers/models/qwen3_vl/modeling_qwen3_vl.py\", line 1126, in get_rope_index grid_thw = next(grid_i\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44232",
-    "created_at": "2026-02-23T16:10:15Z",
-    "deletions": 109,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44472",
+    "created_at": "2026-03-05T14:50:06Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44232/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44232",
+    "files_url": "https://github.com/huggingface/transformers/pull/44472/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44472",
     "labels": [],
-    "merged": true,
-    "number": 44232,
-    "review_comments_count": 2,
+    "merged": false,
+    "number": 44472,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "chore: added CLAUDE.md alias",
-    "updated_at": "2026-02-24T14:48:36Z"
+    "title": "Fix Qwen3VL get_rope_index StopIteration with per-frame video tokens",
+    "updated_at": "2026-03-06T15:15:58Z"
   },
   {
-    "additions": 413,
-    "author": "IlyasMoutawwakil",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 5,
+    "additions": 50,
+    "author": "weiguangli-io",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes #44466 After `.to(device)`, PyTorch's `Module._apply` may create new `Parameter` objects that no longer share storage with tied weights. This caused `remove_tied_weights_from_state_dict` to fail to detect and\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44231",
-    "created_at": "2026-02-23T15:45:47Z",
-    "deletions": 578,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44471",
+    "created_at": "2026-03-05T14:30:17Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44231/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44231",
-    "labels": [],
-    "merged": true,
-    "number": 44231,
-    "review_comments_count": 18,
+    "files_url": "https://github.com/huggingface/transformers/pull/44471/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44471",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44471,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[Performance] FP8 Grouped and Batched Matmuls",
-    "updated_at": "2026-03-11T08:51:02Z"
+    "title": "Fix tied weights serialization being device-dependent",
+    "updated_at": "2026-03-06T14:03:18Z"
   },
   {
-    "additions": 4,
-    "author": "alvarobartt",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR adds the missing backtick (`) on the `AnyToAnyPipeline.__call__` docstrings, as those were showing as in the screenshot below instead. <img width=\"1023\" height=\"400\" alt=\"image\" src=\"https://github.com/user-\u2026",
-    "changed_files": 1,
+    "additions": 8,
+    "author": "weiguangli-io",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Fixes #44360 The reference `fp8_index` kernel clamps per-head q\u00b7k scores with `T.max(logits, 0)` before the weighted sum across heads ([kernel.py#L241](https://huggingface.co/deepseek-ai/DeepSeek-V3.2-Exp/blob/main/inference/kernel.py#L241\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44229",
-    "created_at": "2026-02-23T15:25:47Z",
-    "deletions": 4,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44470",
+    "created_at": "2026-03-05T14:02:05Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44229/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44229",
-    "labels": [],
-    "merged": true,
-    "number": 44229,
+    "files_url": "https://github.com/huggingface/transformers/pull/44470/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44470",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44470,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add missing backtick in `AnyToAnyPipeline.__call__` docstring",
-    "updated_at": "2026-02-23T19:21:08Z"
+    "title": "Add missing ReLU in GlmMoeDsaIndexer",
+    "updated_at": "2026-03-05T15:39:38Z"
   },
   {
-    "additions": 35,
-    "author": "JonoLF",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 2,
+    "additions": 4,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? For remote code that behave correctly with tied weights, we need to keep the same behavior as for the main lib, i.e. not remove them from tied weights (as tied weights are marked as missing to avoid inits!!)",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44228",
-    "created_at": "2026-02-23T15:09:05Z",
-    "deletions": 7,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44469",
+    "created_at": "2026-03-05T13:51:55Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44228/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44228",
+    "files_url": "https://github.com/huggingface/transformers/pull/44469/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44469",
     "labels": [],
-    "merged": false,
-    "number": 44228,
+    "merged": true,
+    "number": 44469,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "[Quantisation] account for nested tensors from quantisers",
-    "updated_at": "2026-03-17T11:57:53Z"
+    "state": "closed",
+    "title": "[remote code/vllm] Fix incorrect tied weights",
+    "updated_at": "2026-03-05T15:07:56Z"
   },
   {
-    "additions": 21,
-    "author": "remi-or",
+    "additions": 13,
+    "author": "itazap",
     "author_association": "MEMBER",
-    "body_excerpt": "This PR adds a logging message when infering the behavior of use async and fixes an error when evicting a graph from the graph buffer.",
-    "changed_files": 4,
+    "body_excerpt": "Replace placeholder tokens as specified in added_tokens_decoder if we have added_tokens_decoder with specific token_ids, we need to overwrite them in spm model ! example: [UNUSED_TOKEN_146] -> <|im_start|> see internlm2: https://huggingfac\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44227",
-    "created_at": "2026-02-23T14:53:53Z",
-    "deletions": 13,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44468",
+    "created_at": "2026-03-05T13:48:56Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44227/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44227",
+    "files_url": "https://github.com/huggingface/transformers/pull/44468/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44468",
     "labels": [],
     "merged": true,
-    "number": 44227,
-    "review_comments_count": 9,
+    "number": 44468,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[CB] Small fixes",
-    "updated_at": "2026-03-03T13:40:10Z"
+    "title": "Replace placeholder tokens as specified in added_tokens_decoder",
+    "updated_at": "2026-03-05T16:29:13Z"
   },
   {
-    "additions": 86,
-    "author": "Cyrilvallez",
+    "additions": 346,
+    "author": "itazap",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. It looks like some models (xlnet and kosmos2_5) and most audio models sometimes rely on the full previous input_ids to prepare inputs. Note that this cannot be compatible with restarting generation\u2026",
-    "changed_files": 12,
+    "body_excerpt": "Replace placeholder tokens as specified in added_tokens_decoder if we have `added_tokens_decoder` with specific token_ids, we need to overwrite them in spm model ! `example: [UNUSED_TOKEN_146] -> <|im_start|>` see internlm2: https://huggin\u2026",
+    "changed_files": 24,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44226",
-    "created_at": "2026-02-23T13:27:23Z",
-    "deletions": 66,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44467",
+    "created_at": "2026-03-05T13:44:54Z",
+    "deletions": 204,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44226/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44226",
+    "files_url": "https://github.com/huggingface/transformers/pull/44467/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44467",
     "labels": [],
-    "merged": true,
-    "number": 44226,
-    "review_comments_count": 17,
-    "state": "closed",
-    "title": "[generate] Always pass full input_ids in `prepare_inputs_for_generation`",
-    "updated_at": "2026-02-24T10:45:49Z"
+    "merged": false,
+    "number": 44467,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Placeholder tokens update",
+    "updated_at": "2026-03-05T13:47:28Z"
   },
   {
-    "additions": 169,
-    "author": "zucchini-nlp",
+    "additions": 20,
+    "author": "kashif",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per title, we weren't running these test for some time because they were being filtered into `non-model` tests. But `non-model` doesn't run tests that are marked as generation, so I moved it back to `generation`\u2026",
-    "changed_files": 5,
+    "body_excerpt": "# What does this PR do? Fix the loss calculation; we should calculate it on scaled targets. <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44225",
-    "created_at": "2026-02-23T12:09:40Z",
-    "deletions": 270,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44465",
+    "created_at": "2026-03-05T12:59:23Z",
+    "deletions": 14,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44225/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44225",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44465/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44465",
+    "labels": [
+      "bug"
+    ],
     "merged": true,
-    "number": 44225,
-    "review_comments_count": 18,
+    "number": 44465,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix generation integration tests",
-    "updated_at": "2026-02-25T09:19:39Z"
+    "title": "[timesfm2_5] fix loss scaling",
+    "updated_at": "2026-03-05T14:50:26Z"
   },
   {
-    "additions": 5,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Small fixes after https://github.com/huggingface/transformers/pull/44130. See https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/8785954cca2fdca181de0b9567059471bcadb959/2026-02-21/ci_resu\u2026",
-    "changed_files": 3,
+    "additions": 16,
+    "author": "weiguangli-io",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes #44462 When a model's `model_type` (e.g. `\"llama\"`) has no entry in `TOKENIZER_MAPPING_NAMES`, `AutoTokenizer.from_pretrained` falls through to loading the tokenizer class declared in `tokenizer_config.json`\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44224",
-    "created_at": "2026-02-23T10:48:19Z",
-    "deletions": 8,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44463",
+    "created_at": "2026-03-05T12:45:57Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44224/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44224",
+    "files_url": "https://github.com/huggingface/transformers/pull/44463/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44463",
     "labels": [],
     "merged": false,
-    "number": 44224,
+    "number": 44463,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Small fixes",
-    "updated_at": "2026-02-24T10:06:14Z"
+    "title": "Fix AutoTokenizer ignoring tokenizer.json for unregistered model types",
+    "updated_at": "2026-03-07T13:50:44Z"
   },
   {
-    "additions": 1,
-    "author": "albertvillanova",
+    "additions": 12,
+    "author": "kashif",
     "author_association": "MEMBER",
-    "body_excerpt": "Fix type of `TrainingArguments.logging_steps`. This PR makes a minor update to the `TrainingArguments` class, so `logging_steps` parameter accepts both integers and floats, rather than only floats. Note these are the expected types in the\u2026",
+    "body_excerpt": "# What does this PR do? for SP loss we do not have torch device mesh but rather a deepspeed only. <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44223",
-    "created_at": "2026-02-23T08:50:04Z",
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44461",
+    "created_at": "2026-03-05T11:39:02Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44223/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44223",
+    "files_url": "https://github.com/huggingface/transformers/pull/44461/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44461",
     "labels": [],
-    "merged": false,
-    "number": 44223,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44461,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Fix type of TrainingArguments.logging_steps",
-    "updated_at": "2026-02-23T09:08:18Z"
+    "title": "[Trainer] fix SP loss",
+    "updated_at": "2026-03-05T13:00:40Z"
   },
   {
     "additions": 1,
-    "author": "matisgagneux21",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary - fix a typo in the Italian add-new-model guide: `docstirng` -> `docstring`. ## Why Small docs quality fix that avoids confusion for readers following the contribution guide.",
+    "author": "3outeille",
+    "author_association": "MEMBER",
+    "body_excerpt": null,
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44219",
-    "created_at": "2026-02-23T00:43:59Z",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44460",
+    "created_at": "2026-03-05T10:53:07Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44219/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44219",
+    "files_url": "https://github.com/huggingface/transformers/pull/44460/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44460",
     "labels": [],
     "merged": true,
-    "number": 44219,
+    "number": 44460,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Docs(it): fix typo in docstring wording",
-    "updated_at": "2026-02-23T15:04:51Z"
+    "title": "trigger tensor parallel utils test in the CI",
+    "updated_at": "2026-03-05T11:25:51Z"
   },
   {
-    "additions": 1,
-    "author": "matisgagneux21",
+    "additions": 95,
+    "author": "weiguangli-io",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary - fix a typo in the Italian migration guide install command: - `stentencepiece` -> `sentencepiece` ## Why The current command fails if copied as-is. This makes the installation snippet runnable for users reading the Italian docs.",
-    "changed_files": 1,
+    "body_excerpt": "## Summary Fixes #44458 PR #42848 introduced a regression where `torch.compile` on `MllamaForConditionalGeneration` fails with a C++ compile error from the torch inductor backend (`'tmp2' was not declared in this scope`). The root cause is\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44218",
-    "created_at": "2026-02-23T00:32:49Z",
-    "deletions": 1,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44459",
+    "created_at": "2026-03-05T07:58:28Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44218/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44218",
+    "files_url": "https://github.com/huggingface/transformers/pull/44459/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44459",
     "labels": [],
-    "merged": true,
-    "number": 44218,
-    "review_comments_count": 0,
+    "merged": false,
+    "number": 44459,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Docs(it): fix typo in sentencepiece install command",
-    "updated_at": "2026-02-23T15:05:17Z"
+    "title": "fix: make Mllama cross attention mask compatible with torch.compile",
+    "updated_at": "2026-03-07T13:50:40Z"
   },
   {
     "additions": 1,
-    "author": "matisgagneux21",
+    "author": "Sai-Suraj-27",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary - Update the Italian migration guide to avoid pointing users to the deprecated `grouped_entities` flag. - Clarify that `aggregation_strategy` is the current option (with a note that it was previously `grouped_entities`). ## Why\u2026",
+    "body_excerpt": "# What does this PR do? Fixes this failing [DepthProModelIntegrationTest](https://github.com/huggingface/transformers/actions/runs/22606636929/job/65500453624#step:14:4893). <img width=\"2231\" height=\"99\" alt=\"image\" src=\"https://github.com\u2026",
     "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44217",
-    "created_at": "2026-02-23T00:10:48Z",
+    "cluster_id": "cluster-43453-9",
+    "cluster_ids": [
+      "cluster-43453-9"
+    ],
+    "cluster_role": "member",
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44456",
+    "created_at": "2026-03-05T06:01:06Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44217/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44217",
+    "files_url": "https://github.com/huggingface/transformers/pull/44456/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44456",
     "labels": [],
-    "merged": false,
-    "number": 44217,
+    "merged": true,
+    "number": 44456,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Docs(it): update deprecated grouped_entities reference",
-    "updated_at": "2026-02-23T14:56:19Z"
+    "title": "Fix failing `DepthProModelIntegrationTest`",
+    "updated_at": "2026-03-05T14:52:40Z"
   },
   {
-    "additions": 13,
-    "author": "nikste",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Automated fix for #44016: Syntax error in Transformer section 3 (Transformers, what can they do?) notebook Fixes #44016 ## What does this PR do? This PR addresses issue #44016 by implementing the fix described in the issue. ---\u2026",
+    "additions": 3,
+    "author": "jiqing-feng",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Check if accelerator exists before using `pin_memory`. reproduce it on a CPU only node: `python examples/pytorch/continuous_batching_simple.py` output: ``` File \"/home/jiqingfe/transformers/src/transformers/generation/continuous_batching/i\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44216",
-    "created_at": "2026-02-22T23:40:56Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44455",
+    "created_at": "2026-03-05T05:20:13Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44216/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44216",
+    "files_url": "https://github.com/huggingface/transformers/pull/44455/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44455",
     "labels": [],
-    "merged": false,
-    "number": 44216,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44455,
+    "review_comments_count": 6,
     "state": "closed",
-    "title": "Fix #44016: Syntax error in Transformer section 3 (Transformers, what can they do?) notebook",
-    "updated_at": "2026-02-22T23:50:57Z"
-  },
-  {
-    "additions": 187,
-    "author": "jmriosal",
-    "author_association": "FIRST_TIMER",
-    "body_excerpt": "# What does this PR do? Add sequence classification capabilities to the family of Granite models (Granite, GraniteMoe, GraniteMoeHybrid, and GraniteMoeShared). Fixes #44214, #35720 ## Why The Granite models currently only have the base mod\u2026",
-    "changed_files": 17,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44215",
-    "created_at": "2026-02-22T23:24:43Z",
-    "deletions": 13,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44215/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44215",
-    "labels": [],
-    "merged": false,
-    "number": 44215,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "Add sequence classification capability to Granite models",
-    "updated_at": "2026-02-24T20:39:37Z"
+    "title": "fix pin_memory for contiguous batching",
+    "updated_at": "2026-03-09T13:49:30Z"
   },
   {
-    "additions": 70,
-    "author": "parthchopra07",
-    "author_association": "FIRST_TIMER",
-    "body_excerpt": "# What does this PR do? This PR refreshes the BEiT model documentation to align it with the current Transformers vision docs style and features. It updates the usage examples, clarifies configuration details, and improves the resources sec\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44213",
-    "created_at": "2026-02-22T18:32:16Z",
-    "deletions": 29,
+    "additions": 17,
+    "author": "weiguangli-io",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary - Fixes the `_init_weights` method in `PegasusPreTrainedModel` and `MarianPreTrainedModel` to handle sinusoidal position embeddings before calling `super()._init_weights(module)`, preventing the generic `nn.Embedding` branch fro\u2026",
+    "changed_files": 3,
+    "cluster_id": "cluster-44053-8",
+    "cluster_ids": [
+      "cluster-44053-8"
+    ],
+    "cluster_role": "member",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44454",
+    "created_at": "2026-03-05T03:51:38Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44213/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44213",
+    "files_url": "https://github.com/huggingface/transformers/pull/44454/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44454",
     "labels": [],
     "merged": false,
-    "number": 44213,
+    "number": 44454,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Update BEiT model card",
-    "updated_at": "2026-02-28T14:33:57Z"
+    "title": "Fix Pegasus sinusoidal position embedding init regression in v5",
+    "updated_at": "2026-03-09T02:17:41Z"
   },
   {
     "additions": 1,
-    "author": "alexandercarruthers",
+    "author": "weiguangli-io",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Updates a broken link in the summarization guide. https://huggingface.co/docs/transformers/tasks/summarization https://huggingface.co/billsum/datasets results in a 404. New URL is https://huggingface.co/datasets/Fis\u2026",
+    "body_excerpt": "## Fix KeyError in `convert_to_native_format` for dict vocab Fixes #44451 ### Problem `AutoTokenizer.from_pretrained(\"vesteinn/ScandiBERT\")` raises `KeyError: 0` in `convert_to_native_format`. ScandiBERT's `tokenizer_config.json` specifies\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44212",
-    "created_at": "2026-02-22T18:02:43Z",
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44452",
+    "created_at": "2026-03-05T03:34:02Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44212/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44212",
+    "files_url": "https://github.com/huggingface/transformers/pull/44452/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44452",
     "labels": [],
     "merged": true,
-    "number": 44212,
+    "number": 44452,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Update 404ing BillSum dataset URL on Summarization Task guide",
-    "updated_at": "2026-02-23T14:46:11Z"
+    "title": "Fix KeyError in convert_to_native_format for dict vocab",
+    "updated_at": "2026-03-19T13:59:23Z"
   },
   {
-    "additions": 10,
-    "author": "ManasVardhan",
+    "additions": 297,
+    "author": "sandesh-bhandari-dev",
     "author_association": "NONE",
-    "body_excerpt": "## Fix for #44016 The `grouped_entities` parameter in `TokenClassificationPipeline._sanitize_parameters` was removed without a deprecation period, causing a `TypeError` when users pass `grouped_entities=True` to the `pipeline()` call (as s\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 32,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44211",
-    "created_at": "2026-02-22T17:04:50Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44449",
+    "created_at": "2026-03-05T01:40:47Z",
+    "deletions": 319,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44211/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44211",
+    "files_url": "https://github.com/huggingface/transformers/pull/44449/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44449",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44211,
+    "number": 44449,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add backward compatibility for deprecated grouped_entities parameter",
-    "updated_at": "2026-02-23T16:26:02Z"
+    "title": "fix: 3 bugs : MoE aux loss, ANSI TTY leak, pipeline removed and also task error",
+    "updated_at": "2026-03-05T13:22:40Z"
   },
   {
-    "additions": 1,
-    "author": "nightcityblade",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Fixes #44206 ## Problem PR #43769 (\"Add Voxtral Realtime\") added a `center` parameter to `LasrFeatureExtractor.__call__()` and passed it to `_torch_extract_fbank_features()`, but that method does not accept it. This causes a `TypeError` on\u2026",
+    "additions": 8,
+    "author": "yonigozlan",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fixes MiniCPM-o-2_6 related tests failures in vLLM, and improve backward compatibility with remote code in general. Cc @hmellor @zucchini-nlp",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44210",
-    "created_at": "2026-02-22T16:06:16Z",
-    "deletions": 4,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44210/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44210",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44447",
+    "created_at": "2026-03-04T21:55:16Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44447/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44447",
     "labels": [],
-    "merged": false,
-    "number": 44210,
+    "merged": true,
+    "number": 44447,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(lasr): remove spurious center arg from _torch_extract_fbank_features call",
-    "updated_at": "2026-02-23T14:32:41Z"
+    "title": "[vLLM] Fix backward compatibility with hardcoded subprocessors classes in processors",
+    "updated_at": "2026-03-05T16:07:28Z"
   },
   {
-    "additions": 197,
-    "author": "paipeline",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? This PR fixes a critical bug in LayoutLMv2Tokenizer where passing `word_labels` for NER token classification tasks would crash with `AttributeError`. The issue was that `word_ids` and `sequence_ids` were being acce\u2026",
-    "changed_files": 3,
+    "additions": 4,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR fixes fsdp loading for rank!=0 as they should stay on meta device. This was reverted in a PR that I can find anymore.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44209",
-    "created_at": "2026-02-22T14:37:25Z",
-    "deletions": 3,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44446",
+    "created_at": "2026-03-04T21:38:00Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44209/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44209",
+    "files_url": "https://github.com/huggingface/transformers/pull/44446/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44446",
     "labels": [],
     "merged": false,
-    "number": 44209,
+    "number": 44446,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix LayoutLMv2Tokenizer NER crashes with word_labels",
-    "updated_at": "2026-02-23T10:30:26Z"
+    "title": "Fix fdsp loading meta device",
+    "updated_at": "2026-03-05T15:07:57Z"
   },
   {
-    "additions": 1,
-    "author": "ainergiz",
+    "additions": 2282,
+    "author": "gabe-l-hart",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR fixes a LASR regression introduced in `#43769` (released in `v5.2.0`). `LasrFeatureExtractor.__call__` passes `center` into `_torch_extract_fbank_features(...)`, but `_torch_extract_fbank_features` did not a\u2026",
-    "changed_files": 3,
+    "body_excerpt": "# What does this PR do? This PR adds support for the forthcoming Granite Docling model based on the Granite 4 LLM architecture (`GraniteMoeHybrid`). ## Draft Status This PR is in draft pending the possibility of some additional changes: -\u2026",
+    "changed_files": 8,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44207",
-    "created_at": "2026-02-21T20:56:49Z",
-    "deletions": 70,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44207/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44207",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44445",
+    "created_at": "2026-03-04T20:54:17Z",
+    "deletions": 0,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44445/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44445",
     "labels": [],
-    "merged": true,
-    "number": 44207,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix LASR feature extractor regression from invalid center argument",
-    "updated_at": "2026-02-23T10:01:35Z"
+    "merged": false,
+    "number": 44445,
+    "review_comments_count": 44,
+    "state": "open",
+    "title": "Adding support for GraniteDoclingHybrid",
+    "updated_at": "2026-03-11T19:19:44Z"
   },
   {
-    "additions": 1,
-    "author": "nightcityblade",
+    "additions": 7,
+    "author": "harshaljanjani",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes #44186 `LayoutLMv2Tokenizer.__init__` passes `only_label_first_subword` to `super().__init__()` but never stores it as `self.only_label_first_subword`. This causes an `AttributeError` when `word_labels` is pa\u2026",
+    "body_excerpt": "### What does this PR do? The following failing use case was identified and fixed in this PR: \u2192 [TOKENIZER_MAPPING_NAMES](https://github.com/huggingface/transformers/blob/main/src/transformers/models/auto/tokenization_auto.py#L63-L338) doe\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44204",
-    "created_at": "2026-02-21T16:06:46Z",
-    "deletions": 0,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44443",
+    "created_at": "2026-03-04T20:01:55Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44204/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44204",
+    "files_url": "https://github.com/huggingface/transformers/pull/44443/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44443",
     "labels": [],
-    "merged": false,
-    "number": 44204,
+    "merged": true,
+    "number": 44443,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(layoutlmv2): store only_label_first_subword attribute in tokenizer",
-    "updated_at": "2026-02-23T10:30:19Z"
+    "title": "fix(tokenizer): Only strip Fast from class names in AutoTokenizer if used as a suffix",
+    "updated_at": "2026-03-09T15:03:49Z"
   },
   {
-    "additions": 22,
-    "author": "nightcityblade",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary Fixes #44075 `_get_sgd()`, `_get_adagrad()`, and `_get_rmsprop()` in `trainer_optimizer.py` only returned `ctx.optimizer_kwargs` (which contains just `lr`), completely ignoring `ctx.optim_args`. This meant that parameters specif\u2026",
-    "changed_files": 1,
+    "additions": 37,
+    "author": "NielsRogge",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR fixes the training of LW-DETR. It turned out that the model was not able to overfit a single batch. Hence I asked Codex to investigate this. It turns out there were 3 bugs: 1. A logits calibration gap, cause\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44203",
-    "created_at": "2026-02-21T15:12:17Z",
-    "deletions": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44441",
+    "created_at": "2026-03-04T19:54:20Z",
+    "deletions": 48,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44203/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44203",
+    "files_url": "https://github.com/huggingface/transformers/pull/44441/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44441",
     "labels": [],
     "merged": true,
-    "number": 44203,
-    "review_comments_count": 0,
+    "number": 44441,
+    "review_comments_count": 4,
     "state": "closed",
-    "title": "fix(trainer): pass optim_args to SGD, Adagrad, and RMSprop optimizers",
-    "updated_at": "2026-02-25T16:04:20Z"
+    "title": "[LW-DETR] Fix training",
+    "updated_at": "2026-03-09T15:36:02Z"
   },
   {
-    "additions": 63,
-    "author": "GS-GOAT",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? When `torch.compile` is used, [_ignore_bidirectional_mask_sdpa](cci:1://file:///c:/Users/BIT/Desktop/proj/gitrepo_clones/transformers/src/transformers/masking_utils.py:303:0-338:16) behaves differently than in eager\u2026",
+    "additions": 11,
+    "author": "weiguangli-io",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes the noisy `HfHubHTTPError` exception output that appears when loading a transformer model from a repository that has discussions disabled. ### Root cause The `previous_pr()` function in `safetensors_conversio\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44202",
-    "created_at": "2026-02-21T13:45:15Z",
-    "deletions": 1,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44440",
+    "created_at": "2026-03-04T18:31:13Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44202/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44202",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44440/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44440",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44202,
+    "number": 44440,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix: bidirectional mask skip when attention dropout is active (#44188)",
-    "updated_at": "2026-03-09T10:31:41Z"
+    "title": "fix: catch HfHubHTTPError in safetensors auto_conversion thread",
+    "updated_at": "2026-03-05T15:39:11Z"
   },
   {
-    "additions": 18,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? main is currently failing with ``` FAILED tests/models/higgs_audio_v2/test_modeling_higgs_audio_v2.py::HiggsAudioV2ModelTest::test_generate_compilation_all_outputs - AssertionError: Lists differ: [torch.Size([2, 15,\u2026",
-    "changed_files": 3,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 9,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44201",
-    "created_at": "2026-02-21T10:03:41Z",
-    "deletions": 13,
+    "additions": 5,
+    "author": "Sai-Suraj-27",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes this failing [ProphetNetModelIntegrationTest](https://github.com/huggingface/transformers/actions/runs/22606636929/job/65500453490#step:14:2331). <img width=\"2303\" height=\"165\" alt=\"image\" src=\"https://github.\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-43453-9",
+    "cluster_ids": [
+      "cluster-43453-9"
+    ],
+    "cluster_role": "member",
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44439",
+    "created_at": "2026-03-04T16:55:50Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44201/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44201",
+    "files_url": "https://github.com/huggingface/transformers/pull/44439/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44439",
     "labels": [],
     "merged": true,
-    "number": 44201,
-    "review_comments_count": 3,
+    "number": 44439,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: HiggsAudioV2 cached decode inputs in compiled generation",
-    "updated_at": "2026-02-23T12:39:19Z"
+    "title": "Fix failing `ProphetNetModelIntegrationTest`",
+    "updated_at": "2026-03-05T15:43:59Z"
   },
   {
-    "additions": 3,
-    "author": "pragnyanramtha",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Fixes #43782 The `weights_only` parameter passed to `from_pretrained()` was not being forwarded to `load_state_dict()` when loading `.bin` checkpoint files in the non-DeepSpeed code path. This caused `weights_only` to always default to `Tr\u2026",
-    "changed_files": 1,
+    "additions": 135,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR adds flashoptim from databricks team into Trainer ! cc @tomaarsen ### Results ``` Optimizer Loss Time Speed Memory \u0394 Mem \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500 AdamW Fused 1.4\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44200",
-    "created_at": "2026-02-21T06:24:17Z",
-    "deletions": 1,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44438",
+    "created_at": "2026-03-04T16:31:35Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44200/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44200",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44438/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44438",
+    "labels": [],
     "merged": false,
-    "number": 44200,
+    "number": 44438,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: propagate `weights_only` param to `load_state_dict` in .bin loading path (#43782)",
-    "updated_at": "2026-02-23T14:20:12Z"
+    "state": "open",
+    "title": "Add flashoptim",
+    "updated_at": "2026-04-02T13:06:17Z"
   },
   {
-    "additions": 3,
-    "author": "gowthamr-tech",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? This PR fixes an issue in `run_image_classification_no_trainer.py` where the script always loaded `dataset_name` (e.g., CIFAR10) even when `--train_dir` or `--validation_dir` was provided. Now, when local dataset d\u2026",
-    "changed_files": 1,
+    "additions": 150,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44199",
-    "created_at": "2026-02-21T06:03:29Z",
-    "deletions": 2,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44437",
+    "created_at": "2026-03-04T15:34:34Z",
+    "deletions": 134,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44199/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44199",
+    "files_url": "https://github.com/huggingface/transformers/pull/44437/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44437",
     "labels": [],
     "merged": true,
-    "number": 44199,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix local dataset loading priority in run_image_classification_no_tra\u2026",
-    "updated_at": "2026-02-24T15:10:17Z"
-  },
-  {
-    "additions": 71,
-    "author": "danielalanbates",
-    "author_association": "NONE",
-    "body_excerpt": "Fixes #43975 ## Summary This PR fixes: `deepseek-ai/deepseek-coder-6.7b-instruct` incorrectly detokenizes in v5 ## Changes ``` src/transformers/tokenization_utils_tokenizers.py | 12 ++++- tests/models/llama/test_tokenization_llama.py | 60\u2026",
-    "changed_files": 2,
-    "cluster_id": "cluster-44191-8",
-    "cluster_ids": [
-      "cluster-44191-8"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44198",
-    "created_at": "2026-02-21T04:54:47Z",
-    "deletions": 1,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44198/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44198",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44198,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix #43975: `deepseek-ai/deepseek-coder-6.7b-instruct` incorrectly detok",
-    "updated_at": "2026-02-23T14:10:47Z"
-  },
-  {
-    "additions": 37,
-    "author": "danielalanbates",
-    "author_association": "NONE",
-    "body_excerpt": "Fixes #43937 ## Summary This PR fixes: [GLM-5] ValueError: GenerationConfig is invalid ## Changes ``` src/transformers/generation/configuration_utils.py | 13 +++++++++++- src/transformers/modeling_utils.py | 2 +- tests/generation/test_conf\u2026",
-    "changed_files": 3,
-    "cluster_id": "cluster-44191-8",
-    "cluster_ids": [
-      "cluster-44191-8"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44197",
-    "created_at": "2026-02-21T04:47:32Z",
-    "deletions": 2,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44197/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44197",
-    "labels": [],
-    "merged": false,
-    "number": 44197,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix #43937: [GLM-5] ValueError: GenerationConfig is invalid",
-    "updated_at": "2026-02-23T09:42:54Z"
-  },
-  {
-    "additions": 12,
-    "author": "danielalanbates",
-    "author_association": "NONE",
-    "body_excerpt": "Fixes #43881 ## Summary This PR fixes: glm-4v-9b loading failed ## Changes ``` src/transformers/configuration_utils.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) ``` ## Testing Please review the changes carefully. T\u2026",
-    "changed_files": 1,
-    "cluster_id": "cluster-44191-8",
-    "cluster_ids": [
-      "cluster-44191-8"
-    ],
-    "cluster_role": "member",
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44196",
-    "created_at": "2026-02-21T04:41:02Z",
-    "deletions": 1,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44196/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44196",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44196,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix #43881: glm-4v-9b loading failed",
-    "updated_at": "2026-02-23T09:45:03Z"
-  },
-  {
-    "additions": 2,
-    "author": "danielalanbates",
-    "author_association": "NONE",
-    "body_excerpt": "Fixes #44062 ## Summary This PR fixes: TypeError: tokenizers.AddedToken() got multiple values for keyword argument 'special' ## Changes ``` src/transformers/tokenization_utils_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-\u2026",
-    "changed_files": 1,
-    "cluster_id": "cluster-44191-8",
-    "cluster_ids": [
-      "cluster-44191-8"
-    ],
-    "cluster_role": "member",
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44195",
-    "created_at": "2026-02-21T04:38:14Z",
-    "deletions": 2,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44195/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44195",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44195,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix #44062: TypeError: tokenizers.AddedToken() got multiple values for k",
-    "updated_at": "2026-02-23T14:10:30Z"
-  },
-  {
-    "additions": 16,
-    "author": "danielalanbates",
-    "author_association": "NONE",
-    "body_excerpt": "Fixes #44075 ## Summary This PR fixes: Optimizer SGD args are not used ## Changes ``` src/transformers/trainer_optimizer.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) ``` ## Testing Please review the changes carefully. The fix\u2026",
-    "changed_files": 1,
-    "cluster_id": "cluster-44191-8",
-    "cluster_ids": [
-      "cluster-44191-8"
-    ],
-    "cluster_role": "member",
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44194",
-    "created_at": "2026-02-21T04:35:53Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44194/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44194",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44194,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix #44075: Optimizer SGD args are not used",
-    "updated_at": "2026-02-23T14:10:20Z"
-  },
-  {
-    "additions": 2,
-    "author": "danielalanbates",
-    "author_association": "NONE",
-    "body_excerpt": "Fixes #43986 ## Summary This PR fixes: Confusing crash when loading a video model through AutoProcessor without torchvision installed ## Changes ``` src/transformers/models/auto/video_processing_auto.py | 2 ++ 1 file changed, 2 insertions(\u2026",
-    "changed_files": 1,
-    "cluster_id": "cluster-44191-8",
-    "cluster_ids": [
-      "cluster-44191-8"
-    ],
-    "cluster_role": "member",
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44193",
-    "created_at": "2026-02-21T04:34:37Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44193/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44193",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44193,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix #43986: Confusing crash when loading a video model through AutoProce",
-    "updated_at": "2026-02-23T09:46:15Z"
-  },
-  {
-    "additions": 3,
-    "author": "danielalanbates",
-    "author_association": "NONE",
-    "body_excerpt": "Fixes #44079 ## Summary This PR fixes: `ModelOutput` keys aren't correctly assigned if key was previously None ## Changes ``` src/transformers/utils/generic.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) ``` ## Testing Please r\u2026",
-    "changed_files": 1,
-    "cluster_id": "cluster-44191-8",
-    "cluster_ids": [
-      "cluster-44191-8"
-    ],
-    "cluster_role": "canonical",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44192",
-    "created_at": "2026-02-21T04:33:52Z",
-    "deletions": 1,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44192/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44192",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44192,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix #44079: `ModelOutput` keys aren't correctly assigned if key was prev",
-    "updated_at": "2026-02-23T14:10:14Z"
-  },
-  {
-    "additions": 95,
-    "author": "danielalanbates",
-    "author_association": "NONE",
-    "body_excerpt": "Fixes #44155 ## Summary This PR fixes: [AudioFlamingo3] Batched inference produces incorrect results due to embedding/token leak between tracks ## Changes ``` .../audioflamingo3/modeling_audioflamingo3.py | 51 +++++++++++++++++++--- .../au\u2026",
-    "changed_files": 3,
-    "cluster_id": "cluster-44191-8",
-    "cluster_ids": [
-      "cluster-44191-8"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44191",
-    "created_at": "2026-02-21T04:32:30Z",
-    "deletions": 11,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44191/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44191",
-    "labels": [
-      "Audio"
-    ],
-    "merged": false,
-    "number": 44191,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix #44155: [AudioFlamingo3] Batched inference produces incorrect result",
-    "updated_at": "2026-03-19T16:16:17Z"
-  },
-  {
-    "additions": 3,
-    "author": "excepshenal",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Under fp16_full_eval or bf16_full_eval, still don't move model to device if using another dist train backend. This is causing bugs with FSDP2 + bf16_full_eval. The dist train backend would still be in charge of movi\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44189",
-    "created_at": "2026-02-21T00:06:16Z",
-    "deletions": 1,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44189/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44189",
-    "labels": [],
-    "merged": false,
-    "number": 44189,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "fix: don't move model to device under other dist train backends",
-    "updated_at": "2026-02-21T00:06:16Z"
-  },
-  {
-    "additions": 3,
-    "author": "harshaljanjani",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? The following issues were identified and fixed in this PR: \u2192 The NER/token classification issue and the downstream bug uncovered in the batched preprocessing use case with `LayoutLMv2Tokenizer`. \u2192 **Reasoning:** T\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44187",
-    "created_at": "2026-02-20T20:02:04Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44187/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44187",
-    "labels": [],
-    "merged": true,
-    "number": 44187,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix(models): Fix LayoutLMv2 NER crash and broken batched truncation/padding",
-    "updated_at": "2026-02-23T10:30:51Z"
-  },
-  {
-    "additions": 361,
-    "author": "stevhliu",
-    "author_association": "MEMBER",
-    "body_excerpt": "part 1 of refactoring the `Trainer` docs - restructure the `toctree` a bit to accommodate new sections and docs - slim down `trainer.md` to be a clearer entry point (will expand the `## Next steps` section as we continue for better navigat\u2026",
-    "changed_files": 7,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44185",
-    "created_at": "2026-02-20T19:25:07Z",
-    "deletions": 578,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44185/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44185",
-    "labels": [],
-    "merged": true,
-    "number": 44185,
-    "review_comments_count": 19,
-    "state": "closed",
-    "title": "[docs] trainer part 1",
-    "updated_at": "2026-02-24T21:18:42Z"
-  },
-  {
-    "additions": 191,
-    "author": "mariam851",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR implements the initial architecture for CircuitGPT (based on OpenAI's research), as discussed in #44121. Key implementations: SparseLinear: Custom layer with Top-K weight sparsity logic. CircuitGpt Components: Attention, MLP, and C\u2026",
-    "changed_files": 3,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44184",
-    "created_at": "2026-02-20T16:58:27Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44184/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44184",
-    "labels": [],
-    "merged": false,
-    "number": 44184,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "feat: add OpenAI CircuitGPT core architecture and sparse linear layers",
-    "updated_at": "2026-02-20T17:18:44Z"
-  },
-  {
-    "additions": 1,
-    "author": "Rocketknight1",
-    "author_association": "MEMBER",
-    "body_excerpt": "Our code has some references to the `grouped_entities` arg to the token classification pipeline, but this is no longer usable. This PR cleans them up entirely! Fixes #44016",
-    "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44182",
-    "created_at": "2026-02-20T15:28:26Z",
-    "deletions": 4,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44182/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44182",
-    "labels": [],
-    "merged": true,
-    "number": 44182,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Remove refs to grouped_entities",
-    "updated_at": "2026-02-24T16:07:24Z"
-  },
-  {
-    "additions": 898,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title! Follow up of https://github.com/huggingface/transformers/pull/44130 and https://github.com/huggingface/transformers/pull/44226. Finally remove the `cache_position` everywhere (not ALL models, but a\u2026",
-    "changed_files": 169,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 11,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44181",
-    "created_at": "2026-02-20T15:24:39Z",
-    "deletions": 2698,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44181/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44181",
-    "labels": [],
-    "merged": true,
-    "number": 44181,
-    "review_comments_count": 32,
-    "state": "closed",
-    "title": "[core] \ud83d\udea8 Completely remove cache positions",
-    "updated_at": "2026-03-04T18:08:42Z"
-  },
-  {
-    "additions": 28,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixes a flaky test in IdeficsForVisionText2TextTest::test_generate_continue_from_inputs_embeds. The flakiness can be reproduced with: ``` pytest -q -p no:rerunfailures --flake-finder --flake-runs=20 \\ tests/models/i\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 10,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44180",
-    "created_at": "2026-02-20T14:30:46Z",
-    "deletions": 2,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44180/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44180",
-    "labels": [],
-    "merged": true,
-    "number": 44180,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix(flaky): idefics generate cache flake",
-    "updated_at": "2026-02-26T16:18:18Z"
-  },
-  {
-    "additions": 27,
-    "author": "itazap",
-    "author_association": "MEMBER",
-    "body_excerpt": "Models with incorrect tokenizer_class in tokenization_config.json that should use TokenziersBackend",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44179",
-    "created_at": "2026-02-20T13:51:44Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44179/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44179",
-    "labels": [],
-    "merged": true,
-    "number": 44179,
-    "review_comments_count": 1,
-    "state": "closed",
-    "title": "Models with incorrect tokenizer_class in tokenization_config.json tha\u2026",
-    "updated_at": "2026-02-23T08:33:13Z"
-  },
-  {
-    "additions": 2940,
-    "author": "ebezzam",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Re-opening https://github.com/huggingface/transformers/pull/37868 TODO - [x] recompute expected outputs - [x] passthrough code given new conventions - [x] check for unused code paths / configuration parameters Origi\u2026",
-    "changed_files": 27,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44178",
-    "created_at": "2026-02-20T12:36:21Z",
-    "deletions": 48,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44178/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44178",
-    "labels": [],
-    "merged": false,
-    "number": 44178,
-    "review_comments_count": 8,
-    "state": "open",
-    "title": "Add xcodec2 model",
-    "updated_at": "2026-03-19T12:08:27Z"
-  },
-  {
-    "additions": 41,
-    "author": "vasqu",
-    "author_association": "MEMBER",
-    "body_excerpt": "As per title, spiritual successor to #44081 Why? Because as is - Only defaults for fa2/fa3, not on other requested kernels - Limits implementations to one kernel/implementation while I suspect that there will be multiple viable versions (i\u2026",
-    "changed_files": 7,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44177",
-    "created_at": "2026-02-20T12:13:30Z",
-    "deletions": 71,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44177/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44177",
-    "labels": [],
-    "merged": true,
-    "number": 44177,
-    "review_comments_count": 2,
-    "state": "closed",
-    "title": "[`Flash Attn`] Enable compatible implementations",
-    "updated_at": "2026-02-20T12:43:35Z"
-  },
-  {
-    "additions": 271,
-    "author": "vasqu",
-    "author_association": "MEMBER",
-    "body_excerpt": "Our kernel loading is incompatible with the original packages as they do not expose the same import structure: - Kernels seem to expose things in the init (and not in the original path) - Original packages seem to expose only within their\u2026",
-    "changed_files": 14,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44176",
-    "created_at": "2026-02-20T11:36:01Z",
-    "deletions": 124,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44176/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44176",
-    "labels": [],
-    "merged": true,
-    "number": 44176,
-    "review_comments_count": 2,
-    "state": "closed",
-    "title": "[`Mamba`] Fix kernel loading",
-    "updated_at": "2026-02-20T16:19:06Z"
-  },
-  {
-    "additions": 1,
-    "author": "itazap",
-    "author_association": "MEMBER",
-    "body_excerpt": "add jamba tokenizer mapping to PreTrainedTokenizerFast (v4/v5 BC) for vllm: https://buildkite.com/vllm/ci/builds/52260/steps/canvas?sid=019c76ad-c8f2-4e59-a2f4-5f3b5bbc204c&tab=output",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44175",
-    "created_at": "2026-02-20T11:00:18Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44175/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44175",
-    "labels": [],
-    "merged": false,
-    "number": 44175,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "add jamba tokenizer mapping to PreTrainedTokenizerFast (v4/v5 BC)",
-    "updated_at": "2026-02-20T16:19:31Z"
-  },
-  {
-    "additions": 1367,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "This draft expands `utils/check_modeling_structure.py` into a rule-driven linter for model code, with new checks and tests, while keeping runtime very low. Key features: - The checker is intentionally AST-only (no heavy imports/execution),\u2026",
-    "changed_files": 4,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 8,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44174",
-    "created_at": "2026-02-20T10:38:11Z",
-    "deletions": 24,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44174/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44174",
-    "labels": [],
-    "merged": true,
-    "number": 44174,
-    "review_comments_count": 38,
-    "state": "closed",
-    "title": "Expand model-structure lint rules with a fast AST-based, ruff-like framework",
-    "updated_at": "2026-03-12T06:42:21Z"
-  },
-  {
-    "additions": 20,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "Fixes flaky GLM OCR generation behavior when 2D `position_ids` are passed explicitly. Reproducible locally with: ``` pytest tests/models/glm_ocr/test_modeling_glm_ocr.py::GlmOcrModelTest::test_generate_with_and_without_position_ids --flake\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 14,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44173",
-    "created_at": "2026-02-20T09:28:48Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44173/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44173",
-    "labels": [],
-    "merged": true,
-    "number": 44173,
-    "review_comments_count": 10,
-    "state": "closed",
-    "title": "fix(flaky): `test_generate_with_and_without_position_ids` in GLM ORC",
-    "updated_at": "2026-02-20T19:06:19Z"
-  },
-  {
-    "additions": 2,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "## Summary I've noticed `test_synthidtext_watermark_processor_distributional_convergence_*` was our slowest tests in CircleCI This PR speeds up the slowest SynthID distributional convergence tests by optimizing a hot path in SynthIDTextWat\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44172",
-    "created_at": "2026-02-20T08:59:30Z",
-    "deletions": 2,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44172/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44172",
-    "labels": [],
-    "merged": true,
-    "number": 44172,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "perf: Optimize SynthID logits processor batch index construction",
-    "updated_at": "2026-02-27T09:32:43Z"
-  },
-  {
-    "additions": 2396,
-    "author": "lmaksym",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR adds TDT decoder support for Parakeet ASR models, extending the existing CTC-only implementation. It incorporates the initial TDT integration work from [#41545](https://github.com/huggingface/transformers/pu\u2026",
-    "changed_files": 27,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 15,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44171",
-    "created_at": "2026-02-20T08:44:46Z",
-    "deletions": 289,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44171/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44171",
-    "labels": [
-      "New model",
-      "Audio"
-    ],
-    "merged": false,
-    "number": 44171,
-    "review_comments_count": 64,
-    "state": "open",
-    "title": "Parakeet tdt",
-    "updated_at": "2026-03-26T09:05:08Z"
-  },
-  {
-    "additions": 74,
-    "author": "veeceey",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary - Adds `GitForCausalLM` to `MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING_NAMES` so GIT can be used with the `visual-question-answering` pipeline - Filters tokenizer outputs in VQA pipeline `preprocess` to only pass keys accepted\u2026",
-    "changed_files": 4,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44170",
-    "created_at": "2026-02-20T08:28:05Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44170/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44170",
-    "labels": [],
-    "merged": false,
-    "number": 44170,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Add GIT model support in VQA pipeline",
-    "updated_at": "2026-02-20T09:34:31Z"
-  },
-  {
-    "additions": 415,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "Initial ty integration. To avoid a gigantic, risky patch, let's start with a baby step where we add the tooling to make repo-check and activate it on a subset of the repo. That gives us a human-readable patch, and allows us to get conforta\u2026",
-    "changed_files": 25,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 12,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44167",
-    "created_at": "2026-02-20T07:39:44Z",
-    "deletions": 210,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44167/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44167",
-    "labels": [],
-    "merged": true,
-    "number": 44167,
-    "review_comments_count": 2,
-    "state": "closed",
-    "title": "chore(typing): initial ty integration",
-    "updated_at": "2026-02-20T19:08:09Z"
-  },
-  {
-    "additions": 73,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "The CI does not output useful info on this flaky test - `tests.models.olmo.test_modeling_olmo.OlmoModelTest testMethod=test_generate_with_static_cache` and makes it harder to determine the root problem when not reproducible locally. This p\u2026",
-    "changed_files": 8,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 8,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44166",
-    "created_at": "2026-02-20T07:20:15Z",
-    "deletions": 61,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44166/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44166",
-    "labels": [],
-    "merged": true,
-    "number": 44166,
-    "review_comments_count": 4,
-    "state": "closed",
-    "title": "Improve `has_similar_generate_outputs` assertions",
-    "updated_at": "2026-02-27T08:26:13Z"
-  },
-  {
-    "additions": 29,
-    "author": "alexmalyshev",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "With Python 3.14 and PEP 649, you can no longer expect `cls.__dict__[\"__annotations__\"]` to contain annotations for the exact class, it will be loaded lazily and can make it seem like the class doesn't have any annotations. The recommended\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44165",
-    "created_at": "2026-02-20T04:47:07Z",
-    "deletions": 14,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44165/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44165",
-    "labels": [],
-    "merged": false,
-    "number": 44165,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix how PreTrainedModel checks annotations on Python 3.14+",
-    "updated_at": "2026-03-20T17:31:11Z"
-  },
-  {
-    "additions": 7,
-    "author": "lhallee",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Fixes: https://github.com/huggingface/transformers/issues/44162 @ArthurZucker @Cyrilvallez",
-    "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44163",
-    "created_at": "2026-02-19T21:44:25Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44163/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44163",
-    "labels": [],
-    "merged": true,
-    "number": 44163,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "ESM2 attention_mask and token_dropout fix",
-    "updated_at": "2026-02-20T15:17:31Z"
-  },
-  {
-    "additions": 379,
-    "author": "cogniera",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "What does this PR do? This PR refactors the LongT5 model to use the @capture_outputs and @can_return_tuple decorators for standardized output handling across the model stack. The refactor removes manual handling of: output_attentions outpu\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44161",
-    "created_at": "2026-02-19T20:46:49Z",
-    "deletions": 170,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44161/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44161",
-    "labels": [],
-    "merged": false,
-    "number": 44161,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "Refactor LongT5 to use @capture_outputs and @can_return_tuple decorators for unified output handling (Fixes #43979)",
-    "updated_at": "2026-02-20T17:28:12Z"
-  },
-  {
-    "additions": 2104,
-    "author": "molbap",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Hey yall, I started porting the pi0 model so Transformers can be a backend for VLAs directly. I checked it against LeRobot on fix/lerobot_openpi: outputs seem to match and for sure lerobot/pi0_base loads cleanly (no\u2026",
-    "changed_files": 22,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 13,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44160",
-    "created_at": "2026-02-19T17:16:29Z",
-    "deletions": 2,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44160/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44160",
-    "labels": [
-      "New model"
-    ],
-    "merged": true,
-    "number": 44160,
-    "review_comments_count": 32,
-    "state": "closed",
-    "title": "Add model lerobot PI0 to transformers",
-    "updated_at": "2026-03-16T10:23:14Z"
-  },
-  {
-    "additions": 67,
-    "author": "samuelleecong",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Summary Closes #28103 - Refactor `OwlViTAttention` to use `ALL_ATTENTION_FUNCTIONS` for dynamic attention backend dispatch (same pattern as CLIP) - Add `eager_attention_forward` standalone function with the standardized interface - Resh\u2026",
-    "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44159",
-    "created_at": "2026-02-19T16:31:44Z",
-    "deletions": 61,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44159/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44159",
-    "labels": [],
-    "merged": false,
-    "number": 44159,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "Add SDPA and Flash Attention support for OWL-ViT",
-    "updated_at": "2026-02-24T12:53:10Z"
-  },
-  {
-    "additions": 141,
-    "author": "leopold-tzafon",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# Fix issue where `use_cache=False`, corrupts model Qwen3vl output. Tested with: ``` import torch from transformers import Qwen3VLForConditionalGeneration, AutoProcessor MODEL_NAME = \"Qwen/Qwen3-VL-4B-Instruct\" DEVICE = \"cuda\" DTYPE = torc\u2026",
-    "changed_files": 9,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 19,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44158",
-    "created_at": "2026-02-19T15:45:13Z",
-    "deletions": 36,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44158/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44158",
-    "labels": [],
-    "merged": true,
-    "number": 44158,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix bug with position_ids on qwen3-vl models, such that position_ids include text position",
-    "updated_at": "2026-02-23T14:53:33Z"
-  },
-  {
-    "additions": 689,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per title, gets rid of `if/else` per attn implementation",
-    "changed_files": 24,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 8,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44157",
-    "created_at": "2026-02-19T14:49:49Z",
-    "deletions": 834,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44157/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44157",
-    "labels": [],
-    "merged": false,
-    "number": 44157,
-    "review_comments_count": 10,
-    "state": "open",
-    "title": "Use correct mask for packed inputs in Qwen-VL ",
-    "updated_at": "2026-02-24T13:13:43Z"
-  },
-  {
-    "additions": 2,
-    "author": "SunMarc",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR fixes a minor error when using aqml quantization. We specified the wrong argument.",
-    "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44156",
-    "created_at": "2026-02-19T14:35:38Z",
-    "deletions": 1,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44156/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44156",
-    "labels": [],
-    "merged": false,
-    "number": 44156,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix aqml `modules_to_not_convert`",
-    "updated_at": "2026-03-10T17:22:36Z"
-  },
-  {
-    "additions": 44,
-    "author": "Aatman09",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 1,
-    "cluster_id": "cluster-43979-28",
-    "cluster_ids": [
-      "cluster-43979-28"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44154",
-    "created_at": "2026-02-19T12:17:56Z",
-    "deletions": 52,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44154/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44154",
-    "labels": [],
-    "merged": false,
-    "number": 44154,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "Refactored vits to match standardized output collection interface",
-    "updated_at": "2026-02-19T12:18:56Z"
-  },
-  {
-    "additions": 79,
-    "author": "IlyasMoutawwakil",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 3,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44152",
-    "created_at": "2026-02-19T09:37:51Z",
-    "deletions": 45,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44152/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44152",
-    "labels": [],
-    "merged": true,
-    "number": 44152,
-    "review_comments_count": 2,
-    "state": "closed",
-    "title": "AutoGrad support for grouped_mm fallback",
-    "updated_at": "2026-02-20T11:15:23Z"
-  },
-  {
-    "additions": 58,
-    "author": "ManasVardhan",
-    "author_association": "NONE",
-    "body_excerpt": "Part of the ongoing effort to migrate models to standardized output collection (ref #43979). Replaces manual output_hidden_states/output_attentions/return_dict handling with @capture_outputs and @can_return_tuple decorators in the BioGPT m\u2026",
-    "changed_files": 2,
-    "cluster_id": "cluster-43979-28",
-    "cluster_ids": [
-      "cluster-43979-28"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44151",
-    "created_at": "2026-02-19T06:55:43Z",
-    "deletions": 134,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44151/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44151",
-    "labels": [],
-    "merged": false,
-    "number": 44151,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Refactor BioGPT output tracing with @capture_outputs and @can_return_tuple",
-    "updated_at": "2026-03-03T00:30:01Z"
-  },
-  {
-    "additions": 22,
-    "author": "ManasVardhan",
-    "author_association": "NONE",
-    "body_excerpt": "Part of the ongoing effort to migrate models to standardized output collection (ref #43979). Replaces manual output_hidden_states/output_attentions/return_dict handling with @capture_outputs and @can_return_tuple decorators in the MPT mode\u2026",
-    "changed_files": 1,
-    "cluster_id": "cluster-43979-28",
-    "cluster_ids": [
-      "cluster-43979-28"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44150",
-    "created_at": "2026-02-19T06:54:09Z",
-    "deletions": 73,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44150/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44150",
-    "labels": [],
-    "merged": false,
-    "number": 44150,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Refactor MPT output tracing with @capture_outputs and @can_return_tuple",
-    "updated_at": "2026-03-03T00:30:02Z"
-  },
-  {
-    "additions": 85,
-    "author": "ManasVardhan",
-    "author_association": "NONE",
-    "body_excerpt": "Part of the ongoing effort to migrate models to standardized output collection (ref #43979). Replaces manual output_hidden_states/output_attentions/return_dict handling with @capture_outputs and @can_return_tuple decorators in the CPMAnt m\u2026",
-    "changed_files": 4,
-    "cluster_id": "cluster-43979-28",
-    "cluster_ids": [
-      "cluster-43979-28"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44149",
-    "created_at": "2026-02-19T06:51:06Z",
-    "deletions": 201,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44149/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44149",
-    "labels": [],
-    "merged": false,
-    "number": 44149,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Refactor CPMAnt output tracing with @capture_outputs and @can_return_tuple",
-    "updated_at": "2026-03-03T00:30:03Z"
-  },
-  {
-    "additions": 33,
-    "author": "ManasVardhan",
-    "author_association": "NONE",
-    "body_excerpt": "Part of the ongoing effort to migrate models to standardized output collection (ref #43979). Replaces manual output_hidden_states/output_attentions/return_dict handling with @capture_outputs and @can_return_tuple decorators in the Bros mod\u2026",
-    "changed_files": 4,
-    "cluster_id": "cluster-43979-28",
-    "cluster_ids": [
-      "cluster-43979-28"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44148",
-    "created_at": "2026-02-19T06:46:24Z",
-    "deletions": 124,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44148/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44148",
-    "labels": [],
-    "merged": false,
-    "number": 44148,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Refactor Bros output tracing with @capture_outputs and @can_return_tuple",
-    "updated_at": "2026-03-03T00:30:05Z"
-  },
-  {
-    "additions": 11,
-    "author": "ManasVardhan",
-    "author_association": "NONE",
-    "body_excerpt": "Part of the ongoing effort to migrate models to standardized output collection (ref #43979). Replaces manual output_hidden_states/output_attentions/return_dict handling with @capture_outputs and @can_return_tuple decorators in the CTRL mod\u2026",
-    "changed_files": 1,
-    "cluster_id": "cluster-43979-28",
-    "cluster_ids": [
-      "cluster-43979-28"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44147",
-    "created_at": "2026-02-19T06:45:32Z",
-    "deletions": 47,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44147/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44147",
-    "labels": [],
-    "merged": false,
-    "number": 44147,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Refactor CTRL output tracing with @capture_outputs and @can_return_tuple",
-    "updated_at": "2026-03-03T00:30:06Z"
-  },
-  {
-    "additions": 38,
-    "author": "khushali9",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? When using a step-based evaluation strategy (IntervalStrategy.STEPS), the trainer may skip evaluation at the final step if the last step does not align with eval_steps. This avoids missing the final evaluation while\u2026",
-    "changed_files": 4,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 15,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44146",
-    "created_at": "2026-02-19T05:29:21Z",
-    "deletions": 11,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44146/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44146",
-    "labels": [],
-    "merged": false,
-    "number": 44146,
-    "review_comments_count": 14,
-    "state": "open",
-    "title": "Ensure final evaluation runs with step-based evaluation strategy",
-    "updated_at": "2026-03-25T23:48:43Z"
-  },
-  {
-    "additions": 398,
-    "author": "balvisio",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR adds support for sequence packing in the ESM2 model. Currently, the RotaryEmbedding class of the ESM2 model supports BSHD format. This PR makes the RotayEmbedding class aware of the`position_ids` and builds\u2026",
-    "changed_files": 8,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 14,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44145",
-    "created_at": "2026-02-19T02:58:50Z",
-    "deletions": 216,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44145/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44145",
-    "labels": [],
-    "merged": false,
-    "number": 44145,
-    "review_comments_count": 9,
-    "state": "open",
-    "title": "Add THD support in ESM",
-    "updated_at": "2026-03-25T17:19:53Z"
-  },
-  {
-    "additions": 1481,
-    "author": "TinderZ",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR adds 5 Chinese translations for common NLP task tutorials that were missing from the `docs/source/zh/tasks/` directory. The following files are added: - `tasks/sequence_classification.md` - \u6587\u672c\u5206\u7c7b - `tasks/tok\u2026",
-    "changed_files": 6,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44144",
-    "created_at": "2026-02-19T02:35:08Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44144/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44144",
-    "labels": [],
-    "merged": true,
-    "number": 44144,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "[docs] Add Chinese translations for common NLP task tutorials",
-    "updated_at": "2026-02-20T16:50:29Z"
-  },
-  {
-    "additions": 2,
-    "author": "nightcityblade",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes minor typos in the `GenerationConfig` class docstring: - \"overriden\" \u2192 \"overridden\" - \"field that are\" \u2192 \"fields that are\" - \"Arg:\" \u2192 \"Args:\" (consistent with the rest of the docstring) No code changes, docum\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44143",
-    "created_at": "2026-02-18T23:07:23Z",
-    "deletions": 2,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44143/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44143",
-    "labels": [],
-    "merged": true,
-    "number": 44143,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "[docs] Fix typos in GenerationConfig docstring",
-    "updated_at": "2026-02-19T13:24:09Z"
-  },
-  {
-    "additions": 72,
-    "author": "eustlb",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? so @Deep-unlearning noticed, benchmarking for the Open ASR leaderbaord, that the current implem is particularly slow. That would make sense since we go through every layer of the encoder forward, and that the stream\u2026",
-    "changed_files": 3,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44142",
-    "created_at": "2026-02-18T21:44:11Z",
-    "deletions": 12,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44142/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44142",
-    "labels": [],
-    "merged": false,
-    "number": 44142,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "[voxtral-realtime] get more perfs!",
-    "updated_at": "2026-02-23T17:25:45Z"
-  },
-  {
-    "additions": 42,
-    "author": "dario-fumarola",
-    "author_association": "NONE",
-    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44102 (original account: @fumadari). ## Summary Refactors the `ibert` model to use the new `@capture_outputs` and `@can_return_tuple` decorators for output tracing, as part of\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44141",
-    "created_at": "2026-02-18T21:14:53Z",
-    "deletions": 154,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44141/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44141",
-    "labels": [],
-    "merged": false,
-    "number": 44141,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Refactor ibert output tracing with capture_outputs",
-    "updated_at": "2026-02-22T02:28:47Z"
-  },
-  {
-    "additions": 66,
-    "author": "dario-fumarola",
-    "author_association": "NONE",
-    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44104 (original account: @fumadari). ## Summary - Replace manual `hidden_states`/`attentions`/`cross_attentions` collection in `MegatronBertEncoder` with the `@capture_outputs`\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44140",
-    "created_at": "2026-02-18T21:14:50Z",
-    "deletions": 207,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44140/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44140",
-    "labels": [],
-    "merged": false,
-    "number": 44140,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Refactor megatron_bert to use automatic output tracing",
-    "updated_at": "2026-02-22T02:28:48Z"
-  },
-  {
-    "additions": 39,
-    "author": "dario-fumarola",
-    "author_association": "NONE",
-    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44105 (original account: @fumadari). ## Summary - Replace manual `hidden_states`/`attentions` collection in `LiltEncoder` with the `@capture_outputs` decorator and forward hook\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44139",
-    "created_at": "2026-02-18T21:14:46Z",
-    "deletions": 127,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44139/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44139",
-    "labels": [],
-    "merged": false,
-    "number": 44139,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Refactor lilt to use automatic output tracing",
-    "updated_at": "2026-02-22T02:28:48Z"
-  },
-  {
-    "additions": 51,
-    "author": "dario-fumarola",
-    "author_association": "NONE",
-    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44106 (original account: @fumadari). ## Summary - Replace manual `hidden_states`/`attentions` collection in `YosoEncoder` with the `@capture_outputs` decorator and forward hook\u2026",
-    "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44138",
-    "created_at": "2026-02-18T21:14:42Z",
-    "deletions": 132,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44138/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44138",
-    "labels": [],
-    "merged": false,
-    "number": 44138,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Refactor yoso to use automatic output tracing",
-    "updated_at": "2026-02-22T02:28:49Z"
-  },
-  {
-    "additions": 43,
-    "author": "dario-fumarola",
-    "author_association": "NONE",
-    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44107 (original account: @fumadari). ## Summary - Replaces manual `output_hidden_states`/`return_dict` resolution in `MraModel` with `@merge_with_config_defaults` and `@capture\u2026",
-    "changed_files": 1,
-    "cluster_id": "cluster-44107-10",
-    "cluster_ids": [
-      "cluster-44107-10"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44137",
-    "created_at": "2026-02-18T21:14:39Z",
-    "deletions": 113,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44137/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44137",
-    "labels": [],
-    "merged": false,
-    "number": 44137,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "refactor(mra): use output tracing decorators",
-    "updated_at": "2026-02-22T02:28:50Z"
-  },
-  {
-    "additions": 37,
-    "author": "dario-fumarola",
-    "author_association": "NONE",
-    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44108 (original account: @fumadari). ## Summary - Adds `@merge_with_config_defaults` and `@capture_outputs` to both `VitDetModel` and `VitDetBackbone`, removing manual `output_\u2026",
-    "changed_files": 2,
-    "cluster_id": "cluster-44107-10",
-    "cluster_ids": [
-      "cluster-44107-10"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44136",
-    "created_at": "2026-02-18T21:14:35Z",
-    "deletions": 86,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44136/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44136",
-    "labels": [],
-    "merged": false,
-    "number": 44136,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "refactor(vitdet): use output tracing decorators",
-    "updated_at": "2026-02-22T02:28:50Z"
-  },
-  {
-    "additions": 48,
-    "author": "dario-fumarola",
-    "author_association": "NONE",
-    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44109 (original account: @fumadari). ## Summary - Part of #43979 \u2014 refactors `hgnet_v2` to use the `capture_outputs` and `merge_with_config_defaults` decorators - Simplifies `H\u2026",
-    "changed_files": 2,
-    "cluster_id": "cluster-44107-10",
-    "cluster_ids": [
-      "cluster-44107-10"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44135",
-    "created_at": "2026-02-18T21:14:31Z",
-    "deletions": 87,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44135/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44135",
-    "labels": [],
-    "merged": false,
-    "number": 44135,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "refactor(hgnet_v2): use capture_outputs for output tracing",
-    "updated_at": "2026-02-22T02:28:51Z"
-  },
-  {
-    "additions": 28,
-    "author": "dario-fumarola",
-    "author_association": "NONE",
-    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44110 (original account: @fumadari). ## Summary - Part of #43979 \u2014 refactors `tvp` to use the `capture_outputs`, `can_return_tuple`, and `merge_with_config_defaults` decorators\u2026",
-    "changed_files": 1,
-    "cluster_id": "cluster-44107-10",
-    "cluster_ids": [
-      "cluster-44107-10"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44134",
-    "created_at": "2026-02-18T21:14:27Z",
-    "deletions": 101,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44134/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44134",
-    "labels": [],
-    "merged": false,
-    "number": 44134,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "refactor(tvp): use capture_outputs for output tracing",
-    "updated_at": "2026-02-22T02:28:51Z"
-  },
-  {
-    "additions": 30,
-    "author": "dario-fumarola",
-    "author_association": "NONE",
-    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44111 (original account: @fumadari). ## Summary - Part of #43979 \u2014 refactors `poolformer` to use the `capture_outputs`, `can_return_tuple`, and `merge_with_config_defaults` dec\u2026",
-    "changed_files": 1,
-    "cluster_id": "cluster-44107-10",
-    "cluster_ids": [
-      "cluster-44107-10"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44133",
-    "created_at": "2026-02-18T21:12:22Z",
-    "deletions": 59,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44133/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44133",
-    "labels": [],
-    "merged": false,
-    "number": 44133,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "refactor(poolformer): use capture_outputs for output tracing",
-    "updated_at": "2026-02-22T02:28:52Z"
-  },
-  {
-    "additions": 13,
-    "author": "eustlb",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? #43674 broke voxtral processor",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44132",
-    "created_at": "2026-02-18T20:13:15Z",
-    "deletions": 34,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44132/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44132",
-    "labels": [],
-    "merged": true,
-    "number": 44132,
-    "review_comments_count": 6,
-    "state": "closed",
-    "title": "[voxtral] fix voxtral proc",
-    "updated_at": "2026-02-19T16:41:53Z"
-  },
-  {
-    "additions": 2,
-    "author": "cluster2600",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What Two small corrections in `docs/source/en/quantization/overview.md`: 1. **Typo fix**: `AuoQuant Notebook` \u2192 `AutoQuant Notebook` in the *User-Friendly Quantization Tools* section. The letter `t` was missing from the link text. 2. **\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44131",
-    "created_at": "2026-02-18T19:25:52Z",
-    "deletions": 2,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44131/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44131",
-    "labels": [],
-    "merged": true,
-    "number": 44131,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "docs: fix typo 'AuoQuant' \u2192 'AutoQuant' and clarify FINEGRAINED_FP8 library column",
-    "updated_at": "2026-02-18T20:49:47Z"
-  },
-  {
-    "additions": 302,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. This PR is the first big step towards removing the `cache_position` everywhere, as they are not needed in general and everything can be inferred from the cache itself. The major changes are the fol\u2026",
-    "changed_files": 23,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44130",
-    "created_at": "2026-02-18T11:58:54Z",
-    "deletions": 886,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44130/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44130",
-    "labels": [],
-    "merged": true,
-    "number": 44130,
-    "review_comments_count": 16,
+    "number": 44437,
+    "review_comments_count": 5,
     "state": "closed",
-    "title": "[generate] Completely stop relying on `cache_position` to prepare inputs",
-    "updated_at": "2026-02-20T18:46:19Z"
+    "title": "protect imports",
+    "updated_at": "2026-03-04T16:19:49Z"
   },
   {
-    "additions": 76,
-    "author": "preetam1407",
+    "additions": 8,
+    "author": "jw9603",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Refactors SpeechT5 to the standardized output tracing interface. - Adds `@capture_outputs` to base encoder/decoder forwards. - Adds `_can_record_outputs` mappings for hidden states and attentions. - Adds `@can_retur\u2026",
+    "body_excerpt": "Fixes #44423 `continuous_batching_chat_completion` was missing input preprocessing and `tokenize=True` in `apply_chat_template`, causing `'str' object has no attribute 'to'` for multimodal models. Added the same `get_model_modality` + `get\u2026",
     "changed_files": 1,
-    "cluster_id": "cluster-43979-28",
-    "cluster_ids": [
-      "cluster-43979-28"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44129",
-    "created_at": "2026-02-18T11:24:13Z",
-    "deletions": 222,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44129/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44129",
-    "labels": [],
-    "merged": false,
-    "number": 44129,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "Refactor SpeechT5 output tracing to standardized output capture",
-    "updated_at": "2026-02-18T11:25:19Z"
-  },
-  {
-    "additions": 59,
-    "author": "ArthurZucker",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Just makes sure we trigger dev version update",
-    "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44128",
-    "created_at": "2026-02-18T10:42:21Z",
-    "deletions": 5,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44128/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44128",
-    "labels": [],
-    "merged": false,
-    "number": 44128,
-    "review_comments_count": 4,
-    "state": "open",
-    "title": "update release workflow",
-    "updated_at": "2026-03-26T09:48:44Z"
-  },
-  {
-    "additions": 3,
-    "author": "itazap",
-    "author_association": "MEMBER",
-    "body_excerpt": "when the model_type isn't in `TOKENIZER_MAPPING_NAMES` (ex. \"llama\"), `TOKENIZER_MAPPING_NAMES.get(\"llama\", \"\")` --> \"\". Then we compare `\"\" != \"LlamaTokenizer\"` (the `tokenizer_class` in `tokenizer_config.json`). Since that's true we earl\u2026",
-    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44127",
-    "created_at": "2026-02-18T10:41:48Z",
-    "deletions": 8,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44436",
+    "created_at": "2026-03-04T15:26:48Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44127/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44127",
+    "files_url": "https://github.com/huggingface/transformers/pull/44436/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44436",
     "labels": [],
     "merged": true,
-    "number": 44127,
-    "review_comments_count": 0,
+    "number": 44436,
+    "review_comments_count": 4,
     "state": "closed",
-    "title": "AutoTokenizer ignores config when model_type is None",
-    "updated_at": "2026-02-18T14:47:52Z"
+    "title": "Fix continuous batching for multimodal models",
+    "updated_at": "2026-03-09T13:58:37Z"
   },
   {
-    "additions": 17,
-    "author": "Cyrilvallez",
+    "additions": 138,
+    "author": "remi-or",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. Let's simplify after https://github.com/huggingface/transformers/pull/42848",
-    "changed_files": 2,
+    "body_excerpt": "This PR adds the option to have a ContinuousBatchingManager not be destroyed after generation is over. This allows the user to re-use the manager without requiring him to know any other entry point for CB apart from `generate_batch` or the\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44126",
-    "created_at": "2026-02-18T09:58:49Z",
-    "deletions": 40,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44435",
+    "created_at": "2026-03-04T14:17:08Z",
+    "deletions": 54,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44126/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44126",
+    "files_url": "https://github.com/huggingface/transformers/pull/44435/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44435",
     "labels": [],
     "merged": true,
-    "number": 44126,
-    "review_comments_count": 0,
+    "number": 44435,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Simplify input preparation in generate",
-    "updated_at": "2026-02-18T10:30:48Z"
+    "title": "[CB] Persistent manager",
+    "updated_at": "2026-03-26T22:02:28Z"
   },
   {
-    "additions": 8,
-    "author": "zucchini-nlp",
+    "additions": 413,
+    "author": "remi-or",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/43986",
-    "changed_files": 1,
+    "body_excerpt": "This PR adds a dedicated config for continuous batching, which is starting to have a lot parameters. This will give the user a clear view of what is possible and make adding new parameters easier. No breaking changes through `account_for_c\u2026",
+    "changed_files": 9,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44125",
-    "created_at": "2026-02-18T09:34:54Z",
-    "deletions": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44434",
+    "created_at": "2026-03-04T13:49:05Z",
+    "deletions": 303,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44125/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44125",
+    "files_url": "https://github.com/huggingface/transformers/pull/44434/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44434",
     "labels": [],
     "merged": true,
-    "number": 44125,
-    "review_comments_count": 2,
+    "number": 44434,
+    "review_comments_count": 12,
     "state": "closed",
-    "title": "Raise informative error when loading video processors",
-    "updated_at": "2026-02-20T08:23:35Z"
+    "title": "[CB] Add dedicated config",
+    "updated_at": "2026-03-13T13:56:40Z"
   },
   {
-    "additions": 10,
-    "author": "mariam851",
+    "additions": 177,
+    "author": "leopold-tzafon",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Description: Adds eval_on_end to TrainingArguments to force evaluation at the end of training, even if the last step doesn't align with eval_steps. Changes: training_args.py: Added eval_on_end field. trainer.py: Added logic to call evaluat\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? Instead of silently failing when mm_token_type_ids is not passed, derives it in Qwen3 and Qwen3.5. Same as it was before: https://github.com/huggingface/transformers/commit/c281a2de8998e66e93fac30a236225528531df9b P\u2026",
+    "changed_files": 18,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44124",
-    "created_at": "2026-02-18T08:52:23Z",
-    "deletions": 0,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44433",
+    "created_at": "2026-03-04T13:46:14Z",
+    "deletions": 61,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44124/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44124",
+    "files_url": "https://github.com/huggingface/transformers/pull/44433/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44433",
     "labels": [],
-    "merged": false,
-    "number": 44124,
+    "merged": true,
+    "number": 44433,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "feat: add eval_on_end to Trainer for final evaluation",
-    "updated_at": "2026-02-18T14:14:16Z"
+    "title": "fix: raise error if mm_token_type_ids not supplied ",
+    "updated_at": "2026-03-12T17:12:47Z"
   },
   {
-    "additions": 15,
-    "author": "cyyever",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR avoids device sync in training loss accumulation by ```torch.where```. The `is_torch_xla_available` condition is also removed.",
-    "changed_files": 1,
+    "additions": 85,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per title, instead of having to divide image token by norm scale, we can do it same way as in other model (eg. gemma3) and add a custom embed layer. It should be 100% BC because users usually call `self.embed_tok\u2026",
+    "changed_files": 8,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44123",
-    "created_at": "2026-02-18T08:22:57Z",
-    "deletions": 21,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44432",
+    "created_at": "2026-03-04T10:04:40Z",
+    "deletions": 38,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44123/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44123",
+    "files_url": "https://github.com/huggingface/transformers/pull/44432/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44432",
     "labels": [],
-    "merged": false,
-    "number": 44123,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "Avoid device sync in training loss accumulation",
-    "updated_at": "2026-02-20T04:43:19Z"
-  },
-  {
-    "additions": 158,
-    "author": "adityuhkapoor",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? Adds 4-bit embedding quantization for BitsAndBytes, mirroring TorchAO's existing `include_input_output_embeddings` and `untie_embedding_weights` pattern (PRs #37802, #37905, #37935). Large-vocabulary models (Llama 3\u2026",
-    "changed_files": 4,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44122",
-    "created_at": "2026-02-18T06:35:09Z",
-    "deletions": 2,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44122/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44122",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44122,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Add BnB 4-bit embedding quantization support",
-    "updated_at": "2026-02-18T14:27:25Z"
-  },
-  {
-    "additions": 14,
-    "author": "tirth8205",
-    "author_association": "NONE",
-    "body_excerpt": "Fixes #34920 After applying `normalize()`, images can have negative values. Calling `resize()` on such images fails because it internally converts to PIL, which requires values in [0, 1] or [0, 255]. ### Fix When the image has values outsi\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44120",
-    "created_at": "2026-02-17T23:56:48Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44120/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44120",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44120,
+    "merged": true,
+    "number": 44432,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: allow image_transforms.resize to handle negative values after normalization",
-    "updated_at": "2026-02-18T14:08:54Z"
+    "title": "Make paligemma embed tokens standard",
+    "updated_at": "2026-03-11T08:38:41Z"
   },
   {
-    "additions": 1,
-    "author": "tirth8205",
-    "author_association": "NONE",
-    "body_excerpt": "Fixes #44117 `TOKENIZER_MAPPING_NAMES.get(config_model_type, \"\")` returns `None` when the key exists with value `None`, causing `AttributeError: 'NoneType' object has no attribute 'replace'` when loading models like `google/siglip2-so400m-\u2026",
-    "changed_files": 1,
+    "additions": 4103,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Re-opening back a PR on cleaning up clip-like model's backbones. Let's merge it now, I've been seeing quite a lot of ppl reporting it and I am not sure when it will be resolved by the big vision refactor Basically,\u2026",
+    "changed_files": 42,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44119",
-    "created_at": "2026-02-17T23:53:20Z",
-    "deletions": 1,
+    "comments_count": 21,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44431",
+    "created_at": "2026-03-04T10:02:13Z",
+    "deletions": 2230,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44119/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44119",
+    "files_url": "https://github.com/huggingface/transformers/pull/44431/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44431",
     "labels": [],
     "merged": false,
-    "number": 44119,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: handle None value from TOKENIZER_MAPPING_NAMES.get() in AutoTokenizer",
-    "updated_at": "2026-02-18T14:04:47Z"
+    "number": 44431,
+    "review_comments_count": 92,
+    "state": "open",
+    "title": "Refactor CLIP-like models",
+    "updated_at": "2026-04-02T16:15:36Z"
   },
   {
-    "additions": 32,
-    "author": "tirth8205",
+    "additions": 0,
+    "author": "Rohang2005",
     "author_association": "NONE",
-    "body_excerpt": "## Fix Fixes #44079 When a `ModelOutput` dataclass field is initialized as `None`, it is correctly excluded from the OrderedDict keys. However, **subsequently setting that field to a non-None value** via attribute assignment (e.g. `outputs\u2026",
-    "changed_files": 2,
+    "body_excerpt": "## What does this PR do? This PR fixes an inconsistency in the AFMoE module where `past_key_values` was passed to a function argument expecting `past_key_value`. The function signature expects a singular cache object (`past_key_value`), bu\u2026",
+    "changed_files": 0,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44118",
-    "created_at": "2026-02-17T23:31:31Z",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44430",
+    "created_at": "2026-03-04T08:13:38Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44118/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44118",
+    "files_url": "https://github.com/huggingface/transformers/pull/44430/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44430",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44118,
+    "number": 44430,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: ModelOutput keys not updated when setting previously-None dataclass fields",
-    "updated_at": "2026-02-18T14:18:12Z"
+    "title": "Fix inconsistent past_key_value/past_key_values usage in AFMoE modeling",
+    "updated_at": "2026-03-04T14:07:32Z"
   },
   {
-    "additions": 27,
-    "author": "dtiourine",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Migrate Flaubert to the @capture_outputs and @can_return_tuple decorator pattern for output handling, as part of #43979. # What does this PR do? - Add `_can_record_outputs = {\"attentions\": MultiHeadAttention}` on `FlaubertPreTrainedModel`\u2026",
+    "additions": 14,
+    "author": "thakoreh",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Fixes #44336 The `loading_report` module was using `PALETTE['italic']` and `PALETTE['bold']` directly in string formatting, which caused ANSI escape codes to be emitted even when stdout is not connected to a terminal (e.g., when\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44116",
-    "created_at": "2026-02-17T21:52:13Z",
-    "deletions": 102,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44429",
+    "created_at": "2026-03-04T07:47:02Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44116/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44116",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44429/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44429",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44116,
+    "number": 44429,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "[WIP] [Flaubert] Refactor output tracing to decorator-based interface",
-    "updated_at": "2026-02-17T21:53:23Z"
+    "state": "closed",
+    "title": "Fix ANSI codes emitted in loading_report when stdout is not a TTY",
+    "updated_at": "2026-03-04T13:58:46Z"
   },
   {
-    "additions": 2,
-    "author": "Deep-unlearning",
-    "author_association": "MEMBER",
-    "body_excerpt": "## Summary - Fix broken `[chat template](./chat_templating)` links in `docs/source/en/tasks/` - `./chat_templating` resolves within `tasks/` (doesn't exist); corrected to `../chat_templating` - Affected files: `tasks/image_text_to_text.md`\u2026",
+    "additions": 10,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "@IlyasMoutawwakil pls help review, thx!",
     "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44115",
-    "created_at": "2026-02-17T21:32:55Z",
-    "deletions": 2,
+    "cluster_id": "cluster-43324-21",
+    "cluster_ids": [
+      "cluster-43324-21"
+    ],
+    "cluster_role": "member",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44428",
+    "created_at": "2026-03-04T07:41:20Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44115/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44115",
+    "files_url": "https://github.com/huggingface/transformers/pull/44428/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44428",
     "labels": [],
     "merged": true,
-    "number": 44115,
-    "review_comments_count": 0,
+    "number": 44428,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "[docs] fix broken chat_templating links in tasks docs",
-    "updated_at": "2026-02-23T16:27:57Z"
+    "title": "Add XPU Expectations for vibe voice acoustic tokenizer tests",
+    "updated_at": "2026-04-02T03:21:38Z"
   },
   {
-    "additions": 716,
-    "author": "23atharvaS",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Summary This PR migrates the `wav2vec2` family to the standardized output-capturing interface (`@capture_outputs` + `@can_return_tuple`) and includes follow-up compatibility fixes required to make full CI green. ## What changed ### Core\u2026",
-    "changed_files": 19,
+    "additions": 43,
+    "author": "Jaredw2289-svg",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes #44297 ## Problem `tokenizer.save_pretrained()` overwrites `tokenizer_class` in `tokenizer_config.json` with the current wrapper class (e.g. `PreTrainedTokenizerFast`) instead of preserving the original class from the loaded config (\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44114",
-    "created_at": "2026-02-17T21:17:35Z",
-    "deletions": 1237,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44427",
+    "created_at": "2026-03-04T06:03:56Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44114/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44114",
+    "files_url": "https://github.com/huggingface/transformers/pull/44427/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44427",
     "labels": [],
     "merged": false,
-    "number": 44114,
+    "number": 44427,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Migrate wav2vec2, wav2vec2_conformer, and wav2vec2_bert to standardized output collection decorators",
-    "updated_at": "2026-02-18T20:34:53Z"
+    "state": "closed",
+    "title": "fix(tokenization): preserve original tokenizer_class in save_pretrained",
+    "updated_at": "2026-03-11T02:59:12Z"
   },
   {
-    "additions": 5,
-    "author": "harshaljanjani",
+    "additions": 29,
+    "author": "kaixuanliu",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? The following issue was identified and fixed in this PR: \u2192 Updates the stale `test_device_override` in `test_processing_granite_speech.py` to verify that the device param controls where speech inputs are placed, r\u2026",
+    "body_excerpt": "@IlyasMoutawwakil Can you help review? Thx!",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44113",
-    "created_at": "2026-02-17T20:01:32Z",
-    "deletions": 7,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44426",
+    "created_at": "2026-03-04T05:57:34Z",
+    "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44113/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44113",
+    "files_url": "https://github.com/huggingface/transformers/pull/44426/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44426",
     "labels": [],
     "merged": true,
-    "number": 44113,
+    "number": 44426,
     "review_comments_count": 2,
     "state": "closed",
-    "title": "fix(testing): Update stale device override test in GraniteSpeech",
-    "updated_at": "2026-02-19T11:24:29Z"
-  },
-  {
-    "additions": 30,
-    "author": "fumadari",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary - Part of #43979 \u2014 refactors `poolformer` to use the `capture_outputs`, `can_return_tuple`, and `merge_with_config_defaults` decorators - Simplifies `PoolFormerLayer` to return a single tensor instead of a 1-tuple - Simplifies `\u2026",
-    "changed_files": 1,
-    "cluster_id": "cluster-44107-10",
-    "cluster_ids": [
-      "cluster-44107-10"
-    ],
-    "cluster_role": "canonical",
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44111",
-    "created_at": "2026-02-17T19:38:02Z",
-    "deletions": 59,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44111/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44111",
-    "labels": [],
-    "merged": false,
-    "number": 44111,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "refactor(poolformer): use capture_outputs for output tracing",
-    "updated_at": "2026-02-18T21:19:22Z"
+    "title": "update the expected output for qwen2_5_vl w/ pytorch 2.10 XPU",
+    "updated_at": "2026-03-04T09:55:55Z"
   },
   {
-    "additions": 28,
-    "author": "fumadari",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary - Part of #43979 \u2014 refactors `tvp` to use the `capture_outputs`, `can_return_tuple`, and `merge_with_config_defaults` decorators - Simplifies `TvpAttention` to always return `(output, attention_probs)` (hooks decide what to capt\u2026",
+    "additions": 1,
+    "author": "qgallouedec",
+    "author_association": "MEMBER",
+    "body_excerpt": "I believe the second `if` should be `elif` so the else branch only triggers when neither the string-truncation NOR the float-formatting conditions apply. Otherwise it overwrites the truncation message with the original long string.",
     "changed_files": 1,
-    "cluster_id": "cluster-44107-10",
-    "cluster_ids": [
-      "cluster-44107-10"
-    ],
-    "cluster_role": "member",
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44110",
-    "created_at": "2026-02-17T19:32:55Z",
-    "deletions": 101,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44110/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44110",
-    "labels": [],
-    "merged": false,
-    "number": 44110,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "refactor(tvp): use capture_outputs for output tracing",
-    "updated_at": "2026-02-18T21:19:24Z"
-  },
-  {
-    "additions": 48,
-    "author": "fumadari",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary - Part of #43979 \u2014 refactors `hgnet_v2` to use the `capture_outputs` and `merge_with_config_defaults` decorators - Simplifies `HGNetV2Encoder` by removing `return_dict` parameter (always returns `BaseModelOutputWithNoAttention`)\u2026",
-    "changed_files": 2,
-    "cluster_id": "cluster-44107-10",
-    "cluster_ids": [
-      "cluster-44107-10"
-    ],
-    "cluster_role": "member",
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44109",
-    "created_at": "2026-02-17T19:23:03Z",
-    "deletions": 87,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44425",
+    "created_at": "2026-03-04T02:48:00Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44109/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44109",
+    "files_url": "https://github.com/huggingface/transformers/pull/44425/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44425",
     "labels": [],
     "merged": false,
-    "number": 44109,
+    "number": 44425,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "refactor(hgnet_v2): use capture_outputs for output tracing",
-    "updated_at": "2026-02-18T21:19:25Z"
+    "state": "open",
+    "title": "Fix conditional check for float formatting",
+    "updated_at": "2026-03-04T02:48:41Z"
   },
   {
-    "additions": 33,
-    "author": "fumadari",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary - Adds `@merge_with_config_defaults` and `@capture_outputs` to both `VitDetModel` and `VitDetBackbone`, removing manual `output_attentions`/`return_dict` resolution - Adds `_can_record_outputs = {\"attentions\": VitDetAttention}`\u2026",
+    "additions": 6,
+    "author": "jw9603",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes `AttributeError: 'str' object has no attribute 'to'` when using `transformers serve --continuous-batching` with multimodal models like Qwen3.5-9B. `processor.apply_chat_template()` returns a plain string (not\u2026",
     "changed_files": 1,
-    "cluster_id": "cluster-44107-10",
-    "cluster_ids": [
-      "cluster-44107-10"
-    ],
-    "cluster_role": "member",
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44108",
-    "created_at": "2026-02-17T19:15:00Z",
-    "deletions": 82,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44424",
+    "created_at": "2026-03-04T00:56:08Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44108/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44108",
+    "files_url": "https://github.com/huggingface/transformers/pull/44424/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44424",
     "labels": [],
     "merged": false,
-    "number": 44108,
+    "number": 44424,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "refactor(vitdet): use output tracing decorators",
-    "updated_at": "2026-02-18T21:19:27Z"
+    "title": "Fix `transformers serve --continuous-batching` for multimodal models",
+    "updated_at": "2026-03-05T09:16:25Z"
   },
   {
-    "additions": 40,
-    "author": "fumadari",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary - Replaces manual `output_hidden_states`/`return_dict` resolution in `MraModel` with `@merge_with_config_defaults` and `@capture_outputs` decorators - Simplifies `MraEncoder` to a plain loop returning a single tensor, removing `\u2026",
-    "changed_files": 1,
-    "cluster_id": "cluster-44107-10",
-    "cluster_ids": [
-      "cluster-44107-10"
-    ],
-    "cluster_role": "member",
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44107",
-    "created_at": "2026-02-17T19:04:42Z",
-    "deletions": 112,
+    "additions": 117,
+    "author": "mitre88",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Adds a Spanish (es) translation of the `conversations.md` guide, which covers the fundamentals of using chat models in Transformers. ### Translated sections: - Chat CLI usage - TextGenerationPipeline in chat mode -\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44422",
+    "created_at": "2026-03-04T00:42:43Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44107/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44107",
+    "files_url": "https://github.com/huggingface/transformers/pull/44422/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44422",
     "labels": [],
-    "merged": false,
-    "number": 44107,
+    "merged": true,
+    "number": 44422,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "refactor(mra): use output tracing decorators",
-    "updated_at": "2026-02-18T21:19:29Z"
+    "title": "docs: add Spanish translation for conversations.md (chat basics)",
+    "updated_at": "2026-03-04T16:45:24Z"
   },
   {
-    "additions": 47,
-    "author": "fumadari",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary - Replace manual `hidden_states`/`attentions` collection in `YosoEncoder` with the `@capture_outputs` decorator and forward hooks - Add `@can_return_tuple` to all 5 wrapper model classes, eliminating manual `return_dict` handlin\u2026",
-    "changed_files": 1,
+    "additions": 309,
+    "author": "michaelbenayoun",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? When we shard weights according to a TP plan, we do not update the corresponding parent module attributes. For instance if we shard the weight of a `torch.nn.Linear`, we should also update its `in_features` or `out_\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44106",
-    "created_at": "2026-02-17T18:59:25Z",
-    "deletions": 132,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44421",
+    "created_at": "2026-03-03T22:51:47Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44106/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44106",
+    "files_url": "https://github.com/huggingface/transformers/pull/44421/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44421",
     "labels": [],
-    "merged": false,
-    "number": 44106,
+    "merged": true,
+    "number": 44421,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Refactor yoso to use automatic output tracing",
-    "updated_at": "2026-02-18T21:19:30Z"
+    "title": "Update parent module attributes when sharding with TP",
+    "updated_at": "2026-03-05T23:32:06Z"
   },
   {
-    "additions": 39,
-    "author": "fumadari",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary - Replace manual `hidden_states`/`attentions` collection in `LiltEncoder` with the `@capture_outputs` decorator and forward hooks - Add `@can_return_tuple` to all 3 wrapper model classes, eliminating manual `return_dict` handlin\u2026",
-    "changed_files": 1,
+    "additions": 249,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "- removes \"Number of accelerators\" section from \"Accelerator selection\" guide since this is probably pretty commonly known - add a new \"DDP\" guide - refactored \"Accelerate\" guide with a more focused overview of what it is and how to config\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44105",
-    "created_at": "2026-02-17T18:54:40Z",
-    "deletions": 127,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44420",
+    "created_at": "2026-03-03T22:41:59Z",
+    "deletions": 250,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44105/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44105",
+    "files_url": "https://github.com/huggingface/transformers/pull/44420/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44420",
     "labels": [],
     "merged": false,
-    "number": 44105,
+    "number": 44420,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Refactor lilt to use automatic output tracing",
-    "updated_at": "2026-02-18T21:19:32Z"
+    "state": "open",
+    "title": "[docs] distributed training",
+    "updated_at": "2026-03-11T17:36:12Z"
   },
   {
-    "additions": 66,
-    "author": "fumadari",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary - Replace manual `hidden_states`/`attentions`/`cross_attentions` collection in `MegatronBertEncoder` with the `@capture_outputs` decorator and forward hooks - Add `@can_return_tuple` to all 8 wrapper model classes, eliminating m\u2026",
+    "additions": 6,
+    "author": "michaelbenayoun",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? To be merged after #44302 and https://github.com/huggingface/kernels/pull/285. It adds the `neuron` device in checks for custom kernels, enabling to load kernels for Neuron devices.",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44104",
-    "created_at": "2026-02-17T18:43:44Z",
-    "deletions": 207,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44417",
+    "created_at": "2026-03-03T20:15:26Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44104/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44104",
+    "files_url": "https://github.com/huggingface/transformers/pull/44417/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44417",
     "labels": [],
-    "merged": false,
-    "number": 44104,
+    "merged": true,
+    "number": 44417,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Refactor megatron_bert to use automatic output tracing",
-    "updated_at": "2026-02-18T21:19:34Z"
+    "title": "Neuron kernels integration",
+    "updated_at": "2026-03-05T17:09:39Z"
   },
   {
-    "additions": 53,
-    "author": "engmohamedsalah",
-    "author_association": "NONE",
-    "body_excerpt": "Fixes #44052 Now and then, the indexer ran into trouble switching between masks and cache. Most of the test failures came from these hiccups: - Indexer cache: the old if seq_len > 1: reset cache heuristic broke assisted decoding (multi-tok\u2026",
-    "changed_files": 3,
+    "additions": 1,
+    "author": "tyler-romero",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Register `olmo_hybrid` in `TOKENIZER_MAPPING_NAMES` so auto-tokenizer resolution works, matching the other auto-registrations already in place for this model.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44103",
-    "created_at": "2026-02-17T18:04:48Z",
-    "deletions": 76,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44416",
+    "created_at": "2026-03-03T19:30:56Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44103/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44103",
+    "files_url": "https://github.com/huggingface/transformers/pull/44416/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44416",
     "labels": [],
-    "merged": false,
-    "number": 44103,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44416,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Fix glm_moe_dsa",
-    "updated_at": "2026-02-18T19:38:11Z"
+    "title": "[tiny] Add olmo_hybrid to tokenizer auto-mapping",
+    "updated_at": "2026-03-04T19:26:10Z"
   },
   {
-    "additions": 42,
-    "author": "fumadari",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Refactors the `ibert` model to use the new `@capture_outputs` and `@can_return_tuple` decorators for output tracing, as part of the meta-issue #43979. **Key changes:** - Added `_can_record_outputs = {\"hidden_states\": IBertLayer,\u2026",
+    "additions": 2,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR removes @MekkCyber from the PR template. cc @Rocketknight1 you only need to ping me now ;)",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44102",
-    "created_at": "2026-02-17T17:21:32Z",
-    "deletions": 154,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44415",
+    "created_at": "2026-03-03T16:59:08Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44102/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44102",
+    "files_url": "https://github.com/huggingface/transformers/pull/44415/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44415",
     "labels": [],
-    "merged": false,
-    "number": 44102,
+    "merged": true,
+    "number": 44415,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Refactor ibert output tracing with capture_outputs",
-    "updated_at": "2026-02-18T21:19:35Z"
+    "title": "Update PR template",
+    "updated_at": "2026-03-04T14:13:04Z"
   },
   {
-    "additions": 210,
-    "author": "aman-coder03",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? This PR refactors XLM's output tracing to align with the standardized output capturing patterns used across the codebase. ### Key changes: - Refactors transformer blocks into a dedicated `XLMLayer` module to enable\u2026",
-    "changed_files": 2,
+    "additions": 35,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/44303 - see also comments here https://github.com/huggingface/transformers/pull/44316#issuecomment-3984362089. Supersedes https://github.com/huggingface/trans\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44414",
+    "created_at": "2026-03-03T16:47:47Z",
+    "deletions": 39,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44414/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44414",
+    "labels": [],
+    "merged": true,
+    "number": 44414,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Reduce tqdm verbosity during model loading",
+    "updated_at": "2026-03-03T16:57:56Z"
+  },
+  {
+    "additions": 4,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title.",
+    "changed_files": 1,
+    "cluster_id": "cluster-44053-8",
+    "cluster_ids": [
+      "cluster-44053-8"
+    ],
+    "cluster_role": "member",
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44101",
-    "created_at": "2026-02-17T17:15:06Z",
-    "deletions": 194,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44413",
+    "created_at": "2026-03-03T16:24:43Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44101/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44101",
+    "files_url": "https://github.com/huggingface/transformers/pull/44413/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44413",
     "labels": [],
-    "merged": false,
-    "number": 44101,
+    "merged": true,
+    "number": 44413,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "[XLM] Refactor output tracing to align with capture_outputs standardized architecture",
-    "updated_at": "2026-02-19T08:08:33Z"
+    "state": "closed",
+    "title": "Fix peft conversion mappings",
+    "updated_at": "2026-03-03T17:08:39Z"
   },
   {
-    "additions": 3,
-    "author": "qgallouedec",
+    "additions": 138,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "In https://github.com/huggingface/trl/pull/5112 a user reported that `trl sft --help` fails It's because three inherited args from `TrainingArguments` (`torch_empty_cache_steps`, `gradient_checkpointing` and `use_liger_kernel`)help strings\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? Extends type checking to `src/transformers/quantizers`",
+    "changed_files": 28,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44100",
-    "created_at": "2026-02-17T17:10:36Z",
-    "deletions": 3,
+    "comments_count": 25,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44412",
+    "created_at": "2026-03-03T14:53:31Z",
+    "deletions": 74,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44100/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44100",
+    "files_url": "https://github.com/huggingface/transformers/pull/44412/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44412",
     "labels": [],
     "merged": true,
-    "number": 44100,
-    "review_comments_count": 0,
+    "number": 44412,
+    "review_comments_count": 33,
     "state": "closed",
-    "title": "Fix percentage formatting in help messages for gradient checkpointing, Liger Kernel, and empty cache steps",
-    "updated_at": "2026-02-20T09:57:51Z"
+    "title": "chore(typing): Add type checking to `src/transformers/quantizers`",
+    "updated_at": "2026-03-11T11:24:11Z"
   },
   {
-    "additions": 2,
-    "author": "qgallouedec",
+    "additions": 59,
+    "author": "burtenshaw",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? This PR updates `AGENTS.md` to discourage duplicated and trivial work by agents. - CLAUDE.md-> AGENTS.md - ssue-thread coordination before PRs - mandatory duplicate-PR checks with gh commands - no one-off busywork P\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44099",
-    "created_at": "2026-02-17T16:45:35Z",
-    "deletions": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44411",
+    "created_at": "2026-03-03T11:35:13Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44099/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44099",
+    "files_url": "https://github.com/huggingface/transformers/pull/44411/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44411",
     "labels": [],
     "merged": true,
-    "number": 44099,
-    "review_comments_count": 0,
+    "number": 44411,
+    "review_comments_count": 4,
     "state": "closed",
-    "title": "Bump dev version",
-    "updated_at": "2026-02-18T10:03:54Z"
+    "title": "Update agentic contributions guidelines in AGENTS.md to force yielding.",
+    "updated_at": "2026-03-12T09:28:45Z"
   },
   {
-    "additions": 125,
-    "author": "aman-coder03",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? This PR refactors ViLT's output handling to align with the standardized patterns used across the codebase. Key changes: - Removes manual `hidden_states`/`attentions` propagation and passes `output_attentions`, `out\u2026",
+    "additions": 38,
+    "author": "zvik",
+    "author_association": "FIRST_TIMER",
+    "body_excerpt": "# What does this PR do? This PR allows the Granite-speech model to use hidden states from the encoder hidden layers. This is an internal model option that is required for the next generation of Granite-speech models. ## Changes: - New conf\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44098",
-    "created_at": "2026-02-17T16:32:34Z",
-    "deletions": 138,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44408",
+    "created_at": "2026-03-03T07:50:39Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44098/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44098",
+    "files_url": "https://github.com/huggingface/transformers/pull/44408/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44408",
     "labels": [],
     "merged": false,
-    "number": 44098,
-    "review_comments_count": 0,
+    "number": 44408,
+    "review_comments_count": 5,
     "state": "open",
-    "title": "[ViLT] Refactor output handling to align with standardized patterns",
-    "updated_at": "2026-02-17T16:37:46Z"
+    "title": "Add option to export  encoder hidden states for Granite-speech",
+    "updated_at": "2026-03-04T10:49:49Z"
   },
   {
-    "additions": 12,
-    "author": "Rocketknight1",
-    "author_association": "MEMBER",
-    "body_excerpt": "The `test_keep_in_fp32_modules` issues in #44052 are because the test assumes a model has **either** `_keep_in_fp32_modules` or `_keep_in_fp32_modules_strict` **but not both.** The only model that uses both is `glm_moe_dsa`, so this is the\u2026",
-    "changed_files": 2,
+    "additions": 23,
+    "author": "hongping-zh",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Summary Adds an \"Energy Efficiency Considerations\" section to the bitsandbytes quantization documentation, providing practical guidance on the energy implications of different quantization configurations. ## Motivation This addresses th\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44097",
-    "created_at": "2026-02-17T15:43:55Z",
-    "deletions": 42,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44407",
+    "created_at": "2026-03-03T04:42:57Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44097/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44097",
+    "files_url": "https://github.com/huggingface/transformers/pull/44407/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44407",
     "labels": [],
-    "merged": true,
-    "number": 44097,
-    "review_comments_count": 2,
-    "state": "closed",
-    "title": "Merge test_keep_in_fp32_modules and test_keep_in_fp32_modules_strict",
-    "updated_at": "2026-02-17T16:23:33Z"
+    "merged": false,
+    "number": 44407,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "docs: add energy efficiency considerations to bitsandbytes quantization guide",
+    "updated_at": "2026-03-25T11:53:49Z"
   },
   {
-    "additions": 3,
-    "author": "eustlb",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Tests were written (and pass) on DGX A100, here are the values for our runners.",
-    "changed_files": 1,
+    "additions": 21,
+    "author": "medhakimbedhief",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Adds sequence-classification support for Qwen3.5 in AutoModelForSequenceClassification. **What does this PR do?** This PR enables loading Qwen3.5 checkpoints with `AutoModelForSequenceClassification`, which previously failed with: `ValueEr\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44096",
-    "created_at": "2026-02-17T15:14:26Z",
-    "deletions": 3,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44406",
+    "created_at": "2026-03-03T03:44:37Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44096/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44096",
+    "files_url": "https://github.com/huggingface/transformers/pull/44406/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44406",
     "labels": [],
     "merged": true,
-    "number": 44096,
+    "number": 44406,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[voxtral-realtime] update runner expected values ",
-    "updated_at": "2026-02-17T15:23:19Z"
+    "title": "Add Qwen3.5 support for sequence classification",
+    "updated_at": "2026-03-04T10:34:22Z"
   },
   {
-    "additions": 43,
-    "author": "Cyrilvallez",
+    "additions": 0,
+    "author": "Rocketknight1",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. The check that was added in https://github.com/huggingface/transformers/pull/43768 is wrong, as a missing weight would NOT be reinitialized in some cases! As for the pointers check, it is actually\u2026",
-    "changed_files": 2,
+    "body_excerpt": "Some generate tests have a ~1% chance of generating short outputs because they hit an EOS token early, which causes the test to flake because it asserts the output shape. This PR enforces `min_length` so that doesn't happen!",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44095",
-    "created_at": "2026-02-17T14:33:22Z",
-    "deletions": 47,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44401",
+    "created_at": "2026-03-02T18:26:56Z",
+    "deletions": 77,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44095/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44095",
+    "files_url": "https://github.com/huggingface/transformers/pull/44401/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44401",
     "labels": [],
     "merged": true,
-    "number": 44095,
-    "review_comments_count": 1,
+    "number": 44401,
+    "review_comments_count": 4,
     "state": "closed",
-    "title": "Fix loading logic issue",
-    "updated_at": "2026-02-17T19:03:51Z"
+    "title": "Enforce min length in some generate tests",
+    "updated_at": "2026-03-04T14:05:50Z"
   },
   {
-    "additions": 24,
-    "author": "ManasVardhan",
+    "additions": 14,
+    "author": "Kokonico",
     "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Refactors the `codegen` model to use the `@capture_outputs` and `@can_return_tuple` decorators, replacing manual `output_attentions`/`output_hidden_states`/`return_dict` handling. ### Changes: - Add `_can_record_ou\u2026",
+    "body_excerpt": "# What does this PR do? As per title, confirms interactive terminal before adding formatting to loading_report output. <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44094",
-    "created_at": "2026-02-17T14:15:10Z",
-    "deletions": 70,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44400",
+    "created_at": "2026-03-02T18:21:48Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44094/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44094",
+    "files_url": "https://github.com/huggingface/transformers/pull/44400/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44400",
     "labels": [],
     "merged": false,
-    "number": 44094,
+    "number": 44400,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Refactor codegen model to use @capture_outputs and @can_return_tuple decorators",
-    "updated_at": "2026-03-03T00:30:07Z"
+    "title": "fix: interactive terminal checks for formatting in loading_report.py",
+    "updated_at": "2026-03-09T14:46:29Z"
   },
   {
-    "additions": 28,
-    "author": "ManasVardhan",
-    "author_association": "NONE",
-    "body_excerpt": "## Summary Refactors the DeBERTa model to use the standardized output collection interface with `@capture_outputs` and `@can_return_tuple` decorators. ### Changes - Add `_can_record_outputs` on `DebertaPreTrainedModel` mapping `hidden_stat\u2026",
-    "changed_files": 1,
-    "cluster_id": "cluster-43979-28",
-    "cluster_ids": [
-      "cluster-43979-28"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44093",
-    "created_at": "2026-02-17T14:15:07Z",
-    "deletions": 129,
+    "additions": 3,
+    "author": "weiguangli-io",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary Fixes #44384 In `Qwen3_5TextModel.forward`, after splitting `position_ids` into `text_position_ids` (index 0, for text) and `position_ids` (indices 1:, for temporal/height/width), the decoder layer call incorrectly passed `posit\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44399",
+    "created_at": "2026-03-02T17:28:59Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44093/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44093",
+    "files_url": "https://github.com/huggingface/transformers/pull/44399/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44399",
     "labels": [],
-    "merged": false,
-    "number": 44093,
+    "merged": true,
+    "number": 44399,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Refactor DeBERTa output tracing with @capture_outputs and @can_return_tuple",
-    "updated_at": "2026-03-03T00:30:09Z"
+    "state": "closed",
+    "title": "Fix position_ids typo in Qwen3_5TextModel forward pass",
+    "updated_at": "2026-03-06T01:48:22Z"
   },
   {
-    "additions": 79,
-    "author": "ManasVardhan",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Refactors the **swinv2** model to use hook-based output capturing via `@capture_outputs` and `@can_return_tuple` decorators, removing the manual threading of `output_attentions`/`output_hidden_states`/`return_dict`\u2026",
+    "additions": 4,
+    "author": "weiguangli-io",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Adds the missing ReLU activation in `GlmMoeDsaIndexer.forward()` on per-head q\u00b7k scores before the weighted sum across heads. The reference DeepSeek-V3.2 `fp8_index` kernel applies `T.max(logits, 0)` (i.e., ReLU) a\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44092",
-    "created_at": "2026-02-17T14:15:04Z",
-    "deletions": 159,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44398",
+    "created_at": "2026-03-02T16:41:01Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44092/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44092",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44398/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44398",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44092,
+    "number": 44398,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Refactor output tracing for swinv2 model",
-    "updated_at": "2026-03-03T00:30:10Z"
+    "title": "Add missing ReLU in GlmMoeDsaIndexer scoring",
+    "updated_at": "2026-03-04T13:54:20Z"
   },
   {
-    "additions": 79,
-    "author": "ManasVardhan",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Refactors the `swin` model to use the standardized output collection interface (`@capture_outputs` and `@can_return_tuple` decorators), as described in #43979. ### Changes **SwinPreTrainedModel:** - Added `_can_rec\u2026",
-    "changed_files": 2,
-    "cluster_id": "cluster-43979-28",
-    "cluster_ids": [
-      "cluster-43979-28"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44091",
-    "created_at": "2026-02-17T14:14:56Z",
-    "deletions": 146,
+    "additions": 86,
+    "author": "weiguangli-io",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes #44380 `GPT2Attention.forward()` did not pass the `scaling` parameter to `attention_interface`, causing `scale_attn_weights` and `scale_attn_by_inverse_layer_idx` config options to be silently ignored when usi\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44397",
+    "created_at": "2026-03-02T16:14:37Z",
+    "deletions": 50,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44091/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44091",
+    "files_url": "https://github.com/huggingface/transformers/pull/44397/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44397",
     "labels": [],
-    "merged": false,
-    "number": 44091,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44397,
+    "review_comments_count": 17,
     "state": "closed",
-    "title": "Refactor Swin output tracing with @capture_outputs and @can_return_tuple",
-    "updated_at": "2026-03-03T00:30:11Z"
+    "title": "Fix GPT2 attention scaling ignored in SDPA/FlashAttention",
+    "updated_at": "2026-03-04T16:47:42Z"
   },
   {
-    "additions": 25,
-    "author": "itazap",
+    "additions": 3,
+    "author": "ArthurZucker",
     "author_association": "MEMBER",
-    "body_excerpt": "bos and eos behaviour should match when updating post processor setting `add_bos_token=True` when `bos_token=None` should silently disables `add_bos_token`. (was already the behavior for `eos_token`)",
+    "body_excerpt": "# What does this PR do? Some renaming should just never be applied when the weight format already matches. (this is actually regardless of remote code). This allows us to remove 1 test added in https://github.com/huggingface/transformers/c\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44090",
-    "created_at": "2026-02-17T13:15:07Z",
-    "deletions": 4,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44396",
+    "created_at": "2026-03-02T15:50:27Z",
+    "deletions": 16,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44090/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44090",
+    "files_url": "https://github.com/huggingface/transformers/pull/44396/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44396",
     "labels": [],
     "merged": true,
-    "number": 44090,
-    "review_comments_count": 0,
+    "number": 44396,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Update post proc",
-    "updated_at": "2026-02-18T15:34:18Z"
+    "title": "[ Dynamic weight loader] fix remote code when format matches",
+    "updated_at": "2026-03-03T17:53:39Z"
   },
   {
-    "additions": 113,
-    "author": "preetam1407",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Fixes #43979 ## Summary Refactor T5 to the standardized output tracing interface. ## Changes - Added `_can_record_outputs` on T5 encoder/decoder stack subclasses. - Added `@capture_outputs` on the base stack forward. - Added `@can_return_t\u2026",
-    "changed_files": 2,
-    "cluster_id": "cluster-43979-28",
+    "additions": 153,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title!",
+    "changed_files": 6,
+    "cluster_id": "cluster-44053-8",
     "cluster_ids": [
-      "cluster-43979-28"
+      "cluster-44053-8"
     ],
     "cluster_role": "member",
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44089",
-    "created_at": "2026-02-17T11:37:18Z",
-    "deletions": 294,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44395",
+    "created_at": "2026-03-02T14:52:12Z",
+    "deletions": 31,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44089/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44089",
+    "files_url": "https://github.com/huggingface/transformers/pull/44395/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44395",
     "labels": [],
-    "merged": false,
-    "number": 44089,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44395,
+    "review_comments_count": 22,
     "state": "closed",
-    "title": "Refactor t5 output tracing",
-    "updated_at": "2026-02-17T13:45:23Z"
+    "title": "Fix kernels security issue",
+    "updated_at": "2026-03-03T15:28:20Z"
   },
   {
-    "additions": 41,
-    "author": "Zephyr-Blessed",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Description Refactors GPT-2 model to use the standardized `@capture_outputs` and `@can_return_tuple` decorators, replacing manual output collection boilerplate. Part of #43979 ## Changes - **`GPT2PreTrainedModel`**: Added `_can_record_o\u2026",
-    "changed_files": 1,
+    "additions": 4742,
+    "author": "eustlb",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? #43514 precedes this PR",
+    "changed_files": 61,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44088",
-    "created_at": "2026-02-17T11:32:42Z",
-    "deletions": 129,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44088/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44088",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44394",
+    "created_at": "2026-03-02T14:49:05Z",
+    "deletions": 7232,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44394/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44394",
     "labels": [],
     "merged": false,
-    "number": 44088,
+    "number": 44394,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Refactor GPT-2 output tracing with capture_outputs/can_return_tuple",
-    "updated_at": "2026-02-17T11:41:32Z"
+    "state": "open",
+    "title": "\ud83d\udea8\ud83d\udea7 FeatureExtractor \u2192 AudioProcessor",
+    "updated_at": "2026-04-03T22:16:41Z"
   },
   {
-    "additions": 16,
-    "author": "huyxdang",
-    "author_association": "NONE",
-    "body_excerpt": "### Summary Refactors the Mamba2 model to use the standardized output collection interface as part of #43979. ### Changes * **Standardized Output Mapping**: Added `_can_record_outputs` to `Mamba2PreTrainedModel` mapping `hidden_states` \u2192 `\u2026",
+    "additions": 61,
+    "author": "ebezzam",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? The acoustic tokenizer was updated to use `VoxtralRealtimeConv1dPaddingCache` in #43625 but the ASR model wasn't updated.",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44087",
-    "created_at": "2026-02-17T11:30:25Z",
-    "deletions": 33,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44392",
+    "created_at": "2026-03-02T13:41:19Z",
+    "deletions": 100,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44087/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44087",
+    "files_url": "https://github.com/huggingface/transformers/pull/44392/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44392",
     "labels": [],
-    "merged": false,
-    "number": 44087,
+    "merged": true,
+    "number": 44392,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Refactor Mamba2 to use standardized output tracing",
-    "updated_at": "2026-03-11T02:08:22Z"
+    "title": "[VibeVoice ASR] Use updated padding cache for ASR model.",
+    "updated_at": "2026-03-02T13:51:02Z"
   },
   {
-    "additions": 16,
-    "author": "Zephyr-Blessed",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Migrates **MGP-STR** to the standardized output collection interface using `@capture_outputs` and `@can_return_tuple` decorators. ### Changes: - Added `_can_record_outputs = {\"hidden_states\": MgpstrLayer, \"attentio\u2026",
-    "changed_files": 1,
+    "additions": 3040,
+    "author": "ebezzam",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Tokenizer was updated to Voxtral cache object in #43625, but forgot to update that of the ASR model",
+    "changed_files": 30,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44086",
-    "created_at": "2026-02-17T11:21:22Z",
-    "deletions": 48,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44086/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44086",
-    "labels": [],
-    "merged": false,
-    "number": 44086,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "[MGP-STR] Refactor output tracing to use capture_outputs/can_return_tuple decorators",
-    "updated_at": "2026-02-17T11:22:25Z"
-  },
-  {
-    "additions": 37,
-    "author": "Zephyr-Blessed",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Refactors the RemBERT model to use the new output tracing decorators (`@capture_outputs` and `@can_return_tuple`), replacing manual output collection boilerplate. ### Changes: - Added `@capture_outputs` decorator t\u2026",
-    "changed_files": 1,
-    "cluster_id": "cluster-43979-28",
-    "cluster_ids": [
-      "cluster-43979-28"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44085",
-    "created_at": "2026-02-17T11:09:55Z",
-    "deletions": 108,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44085/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44085",
-    "labels": [],
-    "merged": false,
-    "number": 44085,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "Refactor RemBERT to use output tracing decorators",
-    "updated_at": "2026-02-17T11:10:59Z"
-  },
-  {
-    "additions": 37,
-    "author": "Zephyr-Blessed",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Refactors the GPT-J model to use the new `capture_outputs` and `can_return_tuple` decorators for output tracing, following the same pattern as #44046 (CodeGen). ### Changes: - Added `@capture_outputs` decorator on\u2026",
-    "changed_files": 1,
-    "cluster_id": "cluster-43979-28",
-    "cluster_ids": [
-      "cluster-43979-28"
-    ],
-    "cluster_role": "member",
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44084",
-    "created_at": "2026-02-17T11:08:48Z",
-    "deletions": 108,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44391",
+    "created_at": "2026-03-02T13:29:59Z",
+    "deletions": 311,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44084/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44084",
+    "files_url": "https://github.com/huggingface/transformers/pull/44391/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44391",
     "labels": [],
     "merged": false,
-    "number": 44084,
+    "number": 44391,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[GPT-J] Refactor output tracing to use capture_outputs/can_return_tuple decorators",
-    "updated_at": "2026-02-17T11:41:38Z"
+    "title": "[VibeVoice ASR] Use newer cache object for modular",
+    "updated_at": "2026-03-02T13:34:23Z"
   },
   {
-    "additions": 2856,
-    "author": "3outeille",
-    "author_association": "MEMBER",
-    "body_excerpt": "This PR introduces **first-class FSDP2 (Fully Sharded Data Parallel v2) support** directly in Transformers, bypassing the need for Accelerate's FSDP wrapper. It covers the full lifecycle: model distribution, training, checkpointing, and CI\u2026",
-    "changed_files": 97,
+    "additions": 3330,
+    "author": "liding-nv",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 13,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44083",
-    "created_at": "2026-02-17T10:57:06Z",
-    "deletions": 201,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44390",
+    "created_at": "2026-03-02T13:22:21Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44083/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44083",
+    "files_url": "https://github.com/huggingface/transformers/pull/44390/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44390",
     "labels": [],
-    "merged": false,
-    "number": 44083,
-    "review_comments_count": 24,
-    "state": "open",
-    "title": "FSDP2 native support in transformers ",
-    "updated_at": "2026-03-25T17:01:02Z"
+    "merged": true,
+    "number": 44390,
+    "review_comments_count": 20,
+    "state": "closed",
+    "title": "add support for nemotron_3",
+    "updated_at": "2026-03-03T18:18:50Z"
   },
   {
-    "additions": 6,
-    "author": "Cyrilvallez",
+    "additions": 5,
+    "author": "Abdennacer-Badaoui",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/44077. Indeed, the call is not optional. This is slightly breaking as the defaut used to be False, so fresh model instantiation will now use a different init\u2026",
-    "changed_files": 3,
+    "body_excerpt": "Adds explicit `timm` installation to the AMD ROCm Docker image. This causes ~200 test failures in AMD CI (e.g., [gemma3n vision tests](https://github.com/huggingface/transformers/actions/runs/22474359922/job/65104428291)). This mirrors wha\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44082",
-    "created_at": "2026-02-17T10:09:03Z",
-    "deletions": 20,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44389",
+    "created_at": "2026-03-02T13:09:55Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44082/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44082",
+    "files_url": "https://github.com/huggingface/transformers/pull/44389/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44389",
     "labels": [],
     "merged": true,
-    "number": 44082,
+    "number": 44389,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix patchtsmixer call to post_init",
-    "updated_at": "2026-02-17T11:05:40Z"
+    "title": "[AMD CI] Add missing timm dependency to ROCm Docker image",
+    "updated_at": "2026-03-03T12:00:19Z"
   },
   {
-    "additions": 48,
-    "author": "ArthurZucker",
+    "additions": 0,
+    "author": "sahilmaniyar888",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? Fixes #44336 ### Summary This PR prevents ANSI style escape sequences from being emitted by `loading_report` when stdout is non-interactive (for example, redirected logs/files). ### Changes - Added a small helper `_\u2026",
+    "changed_files": 0,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44388",
+    "created_at": "2026-03-02T11:40:49Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44388/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44388",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44388,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix loading report ANSI styles for non-TTY output",
+    "updated_at": "2026-03-11T06:29:31Z"
+  },
+  {
+    "additions": 62,
+    "author": "eustlb",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixes #42533 by introducing default flash implementations. cc @vasqu and @cyrilvallez",
-    "changed_files": 6,
+    "body_excerpt": "# What does this PR do? As per title",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44081",
-    "created_at": "2026-02-17T09:54:01Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44386",
+    "created_at": "2026-03-02T10:43:13Z",
+    "deletions": 16,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44081/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44081",
+    "files_url": "https://github.com/huggingface/transformers/pull/44386/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44386",
     "labels": [],
     "merged": true,
-    "number": 44081,
-    "review_comments_count": 1,
+    "number": 44386,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "add default flash impl",
-    "updated_at": "2026-02-19T11:29:54Z"
+    "title": "[higgs-audio-v2] fix sampling",
+    "updated_at": "2026-03-02T13:06:23Z"
   },
   {
-    "additions": 22,
-    "author": "tomaarsen",
+    "additions": 8,
+    "author": "NielsRogge",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? * Patch `__setitem__` on `ModelOutput` even if the parameter was previously `None` Fixes #44079, follow-up from #44050. Essentially, it brings behaviour to the expected as described in #44079: > If I 1) initialize a\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? I wasn't able to run make check-repo locally successfully, unless the following 3 fixes were applied.",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44080",
-    "created_at": "2026-02-17T09:53:36Z",
-    "deletions": 8,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44385",
+    "created_at": "2026-03-02T09:45:15Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44080/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44080",
+    "files_url": "https://github.com/huggingface/transformers/pull/44385/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44385",
+    "labels": [],
+    "merged": false,
+    "number": 44385,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Fix make check-repo",
+    "updated_at": "2026-03-02T09:54:23Z"
+  },
+  {
+    "additions": 4,
+    "author": "JJJYmmm",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fix the attr `_no_split_modules` of `Qwen3_5Model` and `Qwen3_5MoeModel`, which affect the FSDP init of hf Trainer.",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44382",
+    "created_at": "2026-03-02T05:42:48Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44382/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44382",
     "labels": [],
     "merged": true,
-    "number": 44080,
+    "number": 44382,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Patch `__setitem__` on `ModelOutput` even if the parameter was previously `None`",
-    "updated_at": "2026-02-20T10:08:38Z"
+    "title": "[Bugfix] fix qwen3.5 no split module",
+    "updated_at": "2026-03-02T16:17:22Z"
   },
   {
-    "additions": 19,
-    "author": "tomaarsen",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? * Set `input_modalities` on various architectures that aren't just text Sentence Transformers would like to rely on `input_modalities` in the future to determine what modalities can be used. However, it's not quite\u2026",
-    "changed_files": 10,
+    "additions": 2,
+    "author": "carcel-yu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? This PR adds MLU support to `is_torch_bf16_gpu_available()` by checking `torch.mlu.is_bf16_supported()` when an MLU device is available. ### Why is this needed? MLU devices support bf16 training, but they are curr\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44078",
-    "created_at": "2026-02-17T09:15:34Z",
-    "deletions": 1,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44381",
+    "created_at": "2026-03-02T05:34:49Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44078/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44078",
+    "files_url": "https://github.com/huggingface/transformers/pull/44381/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44381",
     "labels": [],
     "merged": true,
-    "number": 44078,
-    "review_comments_count": 6,
+    "number": 44381,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[`fix`] Set input_modalities on various architectures that aren't just text",
-    "updated_at": "2026-02-24T10:39:31Z"
+    "title": "Add MLU bf16 support to is_torch_bf16_gpu_available",
+    "updated_at": "2026-03-06T14:34:30Z"
   },
   {
-    "additions": 11,
-    "author": "mmahjoub5",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR refactors the ImageGPT implementation to adopt the new `@capture_outputs` and `@can_return_tuple` decorators for standardized output collection, as part of https://github.com/huggingface/transformers/issues/\u2026",
-    "changed_files": 1,
+    "additions": 579,
+    "author": "remi-or",
+    "author_association": "MEMBER",
+    "body_excerpt": "# Summary ! This PR is in draft, waiting for https://github.com/huggingface/transformers/pull/44227 to be merged This PR adds support for the `flash_attention_with_kvcache` kernel in continuoys batching. This is very efficient for decode-o\u2026",
+    "changed_files": 14,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44076",
-    "created_at": "2026-02-17T08:46:55Z",
-    "deletions": 62,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44379",
+    "created_at": "2026-03-01T23:13:17Z",
+    "deletions": 235,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44076/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44076",
+    "files_url": "https://github.com/huggingface/transformers/pull/44379/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44379",
     "labels": [],
-    "merged": false,
-    "number": 44076,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "Refectored modeling_imagegpt.py to enable hooks to capture_outputs",
-    "updated_at": "2026-02-18T04:11:40Z"
+    "merged": true,
+    "number": 44379,
+    "review_comments_count": 19,
+    "state": "closed",
+    "title": "[CB] Add paged_attention kernel",
+    "updated_at": "2026-03-09T22:16:31Z"
   },
   {
-    "additions": 66,
-    "author": "aman-coder03",
+    "additions": 1,
+    "author": "redpanda1995",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? This PR migrates TextNet to the new standardized output tracing system using the `@capture_outputs` and `@can_return_tuple` decorators. It adds `_can_record_outputs`, applies `@capture_outputs` to `TextNetModel.for\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44074",
-    "created_at": "2026-02-17T08:23:25Z",
-    "deletions": 52,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44378",
+    "created_at": "2026-03-01T22:57:50Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44074/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44074",
+    "files_url": "https://github.com/huggingface/transformers/pull/44378/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44378",
     "labels": [],
     "merged": false,
-    "number": 44074,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "[TextNet] Refactor output tracing using capture_outputs decorator",
-    "updated_at": "2026-02-17T11:28:11Z"
+    "number": 44378,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix logging with each layer with ms-swift lora fine-tuning ",
+    "updated_at": "2026-03-02T14:18:22Z"
   },
   {
-    "additions": 32,
-    "author": "aman-coder03",
+    "additions": 175,
+    "author": "redpanda1995",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? This PR migrates VisualBert to the new standardized output tracing system using the `@capture_outputs` and `@can_return_tuple` decorators. Specifically, this PR: - Adds `_can_record_outputs` to `VisualBertPreTraine\u2026",
+    "body_excerpt": "# What does this PR do? Fixes TODO: Implement proper TP support for compressed tensors quantization <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release not\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44073",
-    "created_at": "2026-02-17T08:16:59Z",
-    "deletions": 38,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44377",
+    "created_at": "2026-03-01T22:46:07Z",
+    "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44073/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44073",
+    "files_url": "https://github.com/huggingface/transformers/pull/44377/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44377",
     "labels": [],
     "merged": false,
-    "number": 44073,
+    "number": 44377,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "[VisualBert] Refactor output tracing using capture_outputs and can_return_tuple decorators",
-    "updated_at": "2026-02-17T11:29:01Z"
+    "state": "closed",
+    "title": "Implement Tensor Parallelism (TP) support for compressed tensors quantization",
+    "updated_at": "2026-03-02T14:15:47Z"
   },
   {
-    "additions": 12,
-    "author": "Siddhartha7340",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# Refactor efficientnet output tracing # What does this PR do? This Pull Request migrates the EfficientNet model to use the standardized @capture_outputs and @can_return_tuple decorators. - Added _can_record_outputs to `EfficientNetPreTrai\u2026",
-    "changed_files": 1,
-    "cluster_id": "cluster-43979-28",
-    "cluster_ids": [
-      "cluster-43979-28"
-    ],
-    "cluster_role": "member",
+    "additions": 4,
+    "author": "stuckvgn",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary The quick-start code examples in `flan-t5.md` and `flan-ul2.md` use `\"A step by step recipe to make bolognese pasta:\"` as the demo prompt, with output that includes `ground beef`. This PR replaces it with `\"A step by step recipe\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44072",
-    "created_at": "2026-02-17T07:42:01Z",
-    "deletions": 38,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44376",
+    "created_at": "2026-03-01T17:41:48Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44072/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44072",
+    "files_url": "https://github.com/huggingface/transformers/pull/44376/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44376",
     "labels": [],
     "merged": false,
-    "number": 44072,
+    "number": 44376,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "refactor efficientnet output tracing with @capture_outputs and @can_r\u2026",
-    "updated_at": "2026-02-17T07:56:05Z"
+    "state": "closed",
+    "title": "docs: update Flan-T5 and Flan-UL2 example to use plant-based recipe prompt",
+    "updated_at": "2026-03-14T06:47:36Z"
   },
   {
-    "additions": 38,
-    "author": "ArivunidhiA",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Refactors the MPT model to use the new standardized output collection interface as part of #43979. ### Changes: - Added `_can_record_outputs` to `MptPreTrainedModel` mapping `hidden_states` \u2192 `MptBlock` and `attent\u2026",
-    "changed_files": 1,
+    "additions": 6829,
+    "author": "NielsRogge",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR adds RF-DETR using Codex 5.3. It did everything: modular (in 600 lines of code), fast and slow image processors, conversion script with bells and whistles (setting `id2label` etc.) To do: - [x] verify loss c\u2026",
+    "changed_files": 21,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44071",
-    "created_at": "2026-02-17T07:19:17Z",
-    "deletions": 112,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44375",
+    "created_at": "2026-03-01T17:32:17Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44071/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44071",
+    "files_url": "https://github.com/huggingface/transformers/pull/44375/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44375",
     "labels": [],
     "merged": false,
-    "number": 44071,
+    "number": 44375,
     "review_comments_count": 0,
     "state": "open",
-    "title": "[Refactor] Migrate MPT to standardized output tracing decorators",
-    "updated_at": "2026-02-17T07:20:17Z"
+    "title": "Add RF-DETR",
+    "updated_at": "2026-03-05T16:00:53Z"
   },
   {
-    "additions": 272,
-    "author": "rudybear",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Summary - Add GGUF config mapping, defaults, and tokenizer converter for `qwen3_next` (Qwen3-Coder-Next, hybrid DeltaNet+Attention MoE, 80B total / 3B active) - Add `Qwen3NextTensorProcessor` handling DeltaNet-specific tensor transforms\u2026",
-    "changed_files": 3,
+    "additions": 4,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Fixes #44373 The `position_ids` parameter docstrings in `_get_unpad_data()` (line 360) and `_upad_input()` (line 413) in `src/transformers/modeling_flash_attention_utils.py` were incorrectly describing `attention_m\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44070",
-    "created_at": "2026-02-17T07:18:13Z",
-    "deletions": 0,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44374",
+    "created_at": "2026-03-01T17:07:35Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44070/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44070",
+    "files_url": "https://github.com/huggingface/transformers/pull/44374/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44374",
     "labels": [],
     "merged": false,
-    "number": 44070,
+    "number": 44374,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Add GGUF loading support for Qwen3-Next (qwen3_next) architecture",
-    "updated_at": "2026-02-17T07:21:26Z"
+    "state": "closed",
+    "title": "Fix incorrect position_ids docstring in modeling_flash_attention_utils.py",
+    "updated_at": "2026-03-02T14:06:30Z"
   },
   {
-    "additions": 26,
-    "author": "cyyever",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR uses ``torch.isfinite`` to simplify conditions, and the CUDA sync calls may also be reduced.",
-    "changed_files": 26,
+    "additions": 5,
+    "author": "N3u0ns",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Fixes #44355 `inspect.getsource()` fails with `TypeError` when called on Cython-compiled functions or built-in functions that don't have Python source code. This adds a try/except block to gracefully handle this case by returnin\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44069",
-    "created_at": "2026-02-17T06:49:38Z",
-    "deletions": 48,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44372",
+    "created_at": "2026-03-01T13:53:58Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44069/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44069",
-    "labels": [],
-    "merged": true,
-    "number": 44069,
+    "files_url": "https://github.com/huggingface/transformers/pull/44372/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44372",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44372,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Use torch.isfinite",
-    "updated_at": "2026-02-18T01:04:19Z"
+    "title": "fix: handle Cython-compiled functions in get_docstring_indentation_level",
+    "updated_at": "2026-03-02T13:39:50Z"
   },
   {
-    "additions": 42,
-    "author": "mtthw13",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Replaces manual `output_attentions`/`output_hidden_states`/`return_dict` boilerplate in GPT-Neo with the hook-based decorator system. **Changes:** - Added `_can_record_outputs = {\"hidden_states\": GPTNeoBlock, \"attentions\": GPTNeoAttention}\u2026",
-    "changed_files": 2,
+    "additions": 12,
+    "author": "leaderofARS",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# Fix documentation inconsistencies in integrations folder ## Description This PR addresses documentation errors and inconsistencies across the integrations module, specifically clarifying terminology and deprecation status in two key inte\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44068",
-    "created_at": "2026-02-17T06:13:37Z",
-    "deletions": 119,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44369",
+    "created_at": "2026-03-01T07:34:43Z",
+    "deletions": 9,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44068/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44068",
+    "files_url": "https://github.com/huggingface/transformers/pull/44369/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44369",
     "labels": [],
     "merged": false,
-    "number": 44068,
-    "review_comments_count": 0,
+    "number": 44369,
+    "review_comments_count": 7,
     "state": "open",
-    "title": "Refactor GPT-Neo to use `@capture_outputs` and `@can_return_tuple` decorators",
-    "updated_at": "2026-02-18T08:30:32Z"
+    "title": "Feature/integrations docs fix",
+    "updated_at": "2026-03-06T19:47:39Z"
   },
   {
-    "additions": 63,
-    "author": "23atharvaS",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? This PR introduces a new argument `eval_on_end` to the `Trainer` class. When enabled, the Trainer automatically runs evaluation at the end of training. This allows users to obtain final evaluation metrics without e\u2026",
+    "additions": 171,
+    "author": "jayakumarpujar",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Fixes #43701 Models with `_checkpoint_conversion_mapping` (e.g. VLMs like Qwen2.5VL, LLaVA, ColPali, etc.) use a key renaming system: - **Loading** (`from_pretrained`): Checkpoint keys are renamed from original format \u2192 model fo\u2026",
     "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44067",
-    "created_at": "2026-02-17T05:25:26Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44366",
+    "created_at": "2026-03-01T03:43:16Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44067/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44067",
+    "files_url": "https://github.com/huggingface/transformers/pull/44366/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44366",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44067,
+    "number": 44366,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add `eval_on_end` argument to Trainer for final evaluation after training",
-    "updated_at": "2026-02-17T13:32:34Z"
+    "title": "Fix resume_from_checkpoint key mismatch for models with _checkpoint_conversion_mapping",
+    "updated_at": "2026-03-02T14:02:22Z"
   },
   {
-    "additions": 35,
-    "author": "Jay-IIT",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Migrate GPT-J from manual boilerplate output collection to the new decorator-based output tracing system: - Add `_can_record_outputs` to `GPTJPreTrainedModel` - Add `@capture_outputs` and `@merge_with_config_defaults` to `GPTJModel.forward\u2026",
+    "additions": 8,
+    "author": "jayakumarpujar",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - Fixes #44360 - The reference DeepSeek-V3.2 `fp8_index` kernel applies **ReLU** to per-head q\u00b7k scores before weighting and summing across heads: ``` logits[i3_n, i_h] = T.max(logits[i3_n, i_h], 0) * q_s_frag[i_h] ``` [Referenc\u2026",
     "changed_files": 2,
-    "cluster_id": "cluster-43979-28",
-    "cluster_ids": [
-      "cluster-43979-28"
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44364",
+    "created_at": "2026-03-01T02:19:14Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44364/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44364",
+    "labels": [
+      "Code agent slop"
     ],
-    "cluster_role": "member",
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44066",
-    "created_at": "2026-02-17T05:12:11Z",
-    "deletions": 107,
+    "merged": false,
+    "number": 44364,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add missing ReLU in GlmMoeDsaIndexer scoring",
+    "updated_at": "2026-03-02T13:55:48Z"
+  },
+  {
+    "additions": 57,
+    "author": "jayakumarpujar",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - Fixes #44327 - `decode_spans()` in the QA pipeline crashes with `ValueError: kth(=N) out of bounds (N)` when `len(scores_flat) == topk` (e.g., `top_k=100` with `seq_len=10`, since `10\u00b2 = 100`) - Root cause: `np.argpartition(ar\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44363",
+    "created_at": "2026-03-01T01:47:44Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44066/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44066",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44363/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44363",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44066,
+    "number": 44363,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Refactor GPT-J to use standardized output tracing (#43979)",
-    "updated_at": "2026-02-18T18:44:28Z"
+    "state": "closed",
+    "title": "Fix off-by-one in decode_spans causing ValueError with np.argpartition",
+    "updated_at": "2026-03-02T13:03:02Z"
   },
   {
-    "additions": 21,
-    "author": "tysoncung",
+    "additions": 4,
+    "author": "harshaljanjani",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary Refactors the CTRL model to use the standardized output collection interface as part of #43979. ## Changes - Added `_can_record_outputs` to `CTRLPreTrainedModel` mapping `hidden_states` \u2192 `EncoderLayer` and `attentions` \u2192 `Multi\u2026",
+    "body_excerpt": "### What does this PR do? The following failing Dia use case was identified and fixed in this PR: \u2192 [MIGRATION_GUIDE_V5.md](https://github.com/harshaljanjani/transformers/blob/main/MIGRATION_GUIDE_V5.md) states that v5 renamed `additional_\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44065",
-    "created_at": "2026-02-17T02:03:57Z",
-    "deletions": 76,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44362",
+    "created_at": "2026-02-28T20:04:05Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44065/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44065",
+    "files_url": "https://github.com/huggingface/transformers/pull/44362/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44362",
     "labels": [],
-    "merged": false,
-    "number": 44065,
+    "merged": true,
+    "number": 44362,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Refactor CTRL model output tracing with capture_outputs and can_return_tuple",
-    "updated_at": "2026-02-25T00:49:18Z"
+    "title": "fix(tokenizer): Fix MLukeTokenizer AttributeError post-v5 refactor",
+    "updated_at": "2026-03-02T14:51:18Z"
   },
   {
-    "additions": 57,
-    "author": "mariam851",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Description: Adds eval_on_end to TrainingArguments to trigger a final evaluation automatically after training finishes. Key Changes: TrainingArguments: Added eval_on_end boolean flag. Trainer.train: Logic to call evaluate() and merge metri\u2026",
+    "additions": 341,
+    "author": "sxu75374",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? Adds a workaround for the PyTorch MPS `sdpa_vector_2pass_mps` correctness bug ([pytorch/pytorch#174861](https://github.com/pytorch/pytorch/issues/174861)). **The problem:** On Apple Silicon with MPS backend, `F.scal\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44064",
-    "created_at": "2026-02-17T01:10:31Z",
-    "deletions": 16,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44359",
+    "created_at": "2026-02-28T17:47:01Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44064/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44064",
+    "files_url": "https://github.com/huggingface/transformers/pull/44359/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44359",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44064,
+    "number": 44359,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "feat: implement eval_on_end to trigger evaluation after training",
-    "updated_at": "2026-02-17T13:32:40Z"
+    "title": "fix(sdpa): add workaround for MPS sdpa_vector_2pass_mps correctness bug",
+    "updated_at": "2026-03-02T13:54:58Z"
   },
   {
-    "additions": 229,
-    "author": "AutumnAurelium",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This brings the Arcee AFMoE architecture in line with other MoE models' implementation patterns since v5. It also adds integration testing using Trinity Nano. ## Before submitting - [ ] This PR fixes a typo or impro\u2026",
-    "changed_files": 5,
+    "additions": 6,
+    "author": "hardikmeisheri",
+    "author_association": "FIRST_TIMER",
+    "body_excerpt": "## Summary - `ShieldGemma2ForImageClassification` was missing `_tied_weights_keys`, so `model.lm_head.weight` was randomly re-initialized on every `from_pretrained` call instead of being tied to `embed_tokens.weight`. - This caused non-det\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44063",
-    "created_at": "2026-02-17T01:07:13Z",
-    "deletions": 150,
+    "comments_count": 14,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44358",
+    "created_at": "2026-02-28T16:49:27Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44063/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44063",
+    "files_url": "https://github.com/huggingface/transformers/pull/44358/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44358",
     "labels": [],
     "merged": true,
-    "number": 44063,
-    "review_comments_count": 6,
+    "number": 44358,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Update AFMoE architecture to use v5-style MoE impl",
-    "updated_at": "2026-03-19T14:00:46Z"
+    "title": "Fix ShieldGemma2 non-reproducible outputs by adding _tied_weights_keys",
+    "updated_at": "2026-03-16T20:02:09Z"
   },
   {
-    "additions": 2,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "Reproduced locally with ``` pytest -q -m generate --random-order-bucket=none --flake-finder --flake-runs=200 tests/models/kosmos2/test_modeling_kosmos2.py -k test_assisted_decoding_matches_greedy_search ``` Root cause: - prepare_config_and\u2026",
-    "changed_files": 1,
+    "additions": 482,
+    "author": "NabilMch",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 127,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 9,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44061",
-    "created_at": "2026-02-16T22:08:48Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44357",
+    "created_at": "2026-02-28T15:11:37Z",
+    "deletions": 489,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44061/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44061",
-    "labels": [],
-    "merged": true,
-    "number": 44061,
+    "files_url": "https://github.com/huggingface/transformers/pull/44357/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44357",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44357,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix: flaky `Kosmos2ModelTest` test",
-    "updated_at": "2026-02-18T14:23:30Z"
+    "title": "Fix RoPE inv_freq default initialization (Issue #39753)",
+    "updated_at": "2026-03-02T13:50:00Z"
   },
   {
-    "additions": 44,
-    "author": "lakprigan",
-    "author_association": "FIRST_TIMER",
-    "body_excerpt": "## Summary Migrates GPT2 to the standardized output collection interface as part of #43979. - Added `_can_record_outputs` to `GPT2PreTrainedModel` (including `cross_attentions` via `OutputRecorder` targeting the `crossattention` submodule)\u2026",
+    "additions": 6,
+    "author": "iamaber",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Checks if model is already in target dtype before casting to avoid redundant copies that cause 25% performance degradation with `--fp16_full_eval`. ## Changes - Added dtype check before casting model to fp16/bf16 in `evaluation_\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44059",
-    "created_at": "2026-02-16T20:14:30Z",
-    "deletions": 133,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44356",
+    "created_at": "2026-02-28T14:24:32Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44059/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44059",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44356/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44356",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44059,
+    "number": 44356,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "[GPT2] Refactor output tracing to use capture_outputs/can_return_tuple decorators",
-    "updated_at": "2026-02-25T17:47:45Z"
+    "state": "closed",
+    "title": "fix: avoid redundant fp16/bf16 model casts in evaluation_loop",
+    "updated_at": "2026-03-02T13:59:38Z"
   },
   {
-    "additions": 122,
-    "author": "engmohamedsalah",
+    "additions": 73,
+    "author": "sxu75374",
     "author_association": "NONE",
-    "body_excerpt": "## Summary Fixes #44052 \u2014 resolves 10 of 11 skipped tests for the `glm_moe_dsa` model. **Root causes fixed:** - **DSA indexer mask shape mismatch**: The attention mask was not properly normalized to 4D before being passed to the indexer an\u2026",
-    "changed_files": 3,
+    "body_excerpt": "## What does this PR do? Fixes an off-by-one error in `decode_spans()` where `np.argpartition` is called with `kth == len(arr)` when `topk` equals the number of candidate scores. This raises `ValueError: kth(=N) out of bounds (N)`. **Root\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44058",
-    "created_at": "2026-02-16T19:24:30Z",
-    "deletions": 84,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44354",
+    "created_at": "2026-02-28T08:46:39Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44058/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44058",
+    "files_url": "https://github.com/huggingface/transformers/pull/44354/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44354",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 44058,
+    "number": 44354,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix skipped tests for glm_moe_dsa model",
-    "updated_at": "2026-02-17T17:23:03Z"
+    "title": "fix: off-by-one in decode_spans causes ValueError when topk == len(scores)",
+    "updated_at": "2026-03-02T13:02:38Z"
   },
   {
-    "additions": 0,
-    "author": "mariam851",
+    "additions": 50,
+    "author": "kaixuanliu",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR addresses memory efficiency issues in the Qwen2Moe implementation (reported in #43856). Users experienced Out-of-Memory (OOM) errors during quantization and inference, particularly with large reserved memory (e.g., 27GB on H100) th\u2026",
-    "changed_files": 0,
+    "body_excerpt": "@IlyasMoutawwakil , pls help review, thx!",
+    "changed_files": 1,
+    "cluster_id": "cluster-43324-21",
+    "cluster_ids": [
+      "cluster-43324-21"
+    ],
+    "cluster_role": "member",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44353",
+    "created_at": "2026-02-28T07:50:37Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44353/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44353",
+    "labels": [],
+    "merged": true,
+    "number": 44353,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "add expectations for xpu for olmo_hybrid model",
+    "updated_at": "2026-04-02T03:22:06Z"
+  },
+  {
+    "additions": 13,
+    "author": "giulio-leone",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Fixes #44336 The `loading_report` module emitted **bold/italic ANSI escape codes** even when `stdout` was not connected to a terminal (e.g. piped or redirected output). While `_color()` already gated color codes behind `sys.stdo\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44057",
-    "created_at": "2026-02-16T18:35:01Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44352",
+    "created_at": "2026-02-28T06:22:19Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44057/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44057",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44352/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44352",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44057,
+    "number": 44352,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(qwen3_moe): optimize memory and fix OOM in MoE layers",
-    "updated_at": "2026-02-16T21:47:41Z"
+    "title": "fix: suppress ANSI escape codes when stdout is not a terminal",
+    "updated_at": "2026-03-02T13:59:15Z"
   },
   {
-    "additions": 50,
-    "author": "aman-coder03",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? This PR migrates MPNet to the new standardized output tracing system using the `@capture_outputs` decorator. Specifically, this PR: - Applies `@capture_outputs` to `MPNetModel.forward` - Removes manual accumulation\u2026",
-    "changed_files": 1,
+    "additions": 7,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": null,
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44056",
-    "created_at": "2026-02-16T18:27:05Z",
-    "deletions": 14,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44350",
+    "created_at": "2026-02-28T03:20:47Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44056/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44056",
+    "files_url": "https://github.com/huggingface/transformers/pull/44350/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44350",
     "labels": [],
-    "merged": false,
-    "number": 44056,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "[MPNet] Refactor output tracing using capture_outputs decorator",
-    "updated_at": "2026-02-17T11:23:12Z"
+    "merged": true,
+    "number": 44350,
+    "review_comments_count": 6,
+    "state": "closed",
+    "title": "skip 1 invalid test case for higgs_audio_v2",
+    "updated_at": "2026-03-05T11:08:09Z"
   },
   {
-    "additions": 5,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do?",
-    "changed_files": 1,
+    "additions": 49,
+    "author": "zzc0430",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? > Inspired by https://github.com/huggingface/transformers/pull/44347#issuecomment-3976028358 Fixes `transformers serve` failing with hybrid models like Qwen3.5 that use `linear_attention` layers. Two issues are addr\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44055",
-    "created_at": "2026-02-16T18:26:43Z",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44349",
+    "created_at": "2026-02-28T03:09:30Z",
     "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44055/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44055",
+    "files_url": "https://github.com/huggingface/transformers/pull/44349/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44349",
     "labels": [],
-    "merged": true,
-    "number": 44055,
+    "merged": false,
+    "number": 44349,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix unprotected torch import",
-    "updated_at": "2026-02-16T18:43:01Z"
+    "title": "fix: support linear_attention in continuous batching and fix serve ch\u2026",
+    "updated_at": "2026-03-02T13:48:04Z"
   },
   {
-    "additions": 346,
-    "author": "ArthurZucker",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Add flash MLA interface. - It does not work I get a segfault - we don't leverage the paged cache so it's not as efficient as that I reckon. ```bash Fetching 6 files: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2026",
-    "changed_files": 10,
+    "additions": 341,
+    "author": "n0kovo",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Summary Most quantized models for Apple Silicon on the Hub are in MLX format. The `MetalConfig` quantization backend supports on-the-fly quantization of standard checkpoints but cannot load pre-quantized MLX models. This PR fixes the fi\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44054",
-    "created_at": "2026-02-16T18:07:14Z",
-    "deletions": 93,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44054/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44054",
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44348",
+    "created_at": "2026-02-28T00:24:32Z",
+    "deletions": 32,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44348/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44348",
     "labels": [],
     "merged": false,
-    "number": 44054,
+    "number": 44348,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Flash mla interface",
-    "updated_at": "2026-02-20T11:14:39Z"
+    "title": "Enable MetalConfig to load pre-quantized MLX models from HuggingFace Hub",
+    "updated_at": "2026-03-02T17:18:46Z"
   },
   {
-    "additions": 2,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do?",
-    "changed_files": 1,
-    "cluster_id": "cluster-44053-8",
-    "cluster_ids": [
-      "cluster-44053-8"
-    ],
-    "cluster_role": "member",
+    "additions": 49,
+    "author": "sxu75374",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Fixes `AttributeError` when using continuous batching with composite model configs (e.g. `Qwen3_5Config` for vision-language models). Composite configs store attributes like `num_attention_heads` and `num_key_value\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44053",
-    "created_at": "2026-02-16T17:59:48Z",
-    "deletions": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44347",
+    "created_at": "2026-02-27T22:48:49Z",
+    "deletions": 8,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44053/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44053",
-    "labels": [],
-    "merged": true,
-    "number": 44053,
+    "files_url": "https://github.com/huggingface/transformers/pull/44347/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44347",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44347,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix peft conversion typo",
-    "updated_at": "2026-02-17T11:12:19Z"
+    "title": "fix: resolve composite config in PagedAttentionCache and group_layers_by_attn_type",
+    "updated_at": "2026-03-02T13:41:23Z"
   },
   {
-    "additions": 2,
-    "author": "tomaarsen",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? * Fix incorrect forward type hint for Gemma3n ## Details The type hint didn't match the actual returned class: https://github.com/huggingface/transformers/blob/349e00c1a367ce263624e525038250625dcf20c7/src/transforme\u2026",
+    "additions": 4,
+    "author": "sxu75374",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? Fixes the `q_a_layernorm` and `kv_a_layernorm` in DeepSeek V2/V3 MLA attention to explicitly receive `config.rms_norm_eps` instead of falling back to the RMSNorm class default (`1e-6`). **The problem:** All other RM\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44051",
-    "created_at": "2026-02-16T17:26:24Z",
-    "deletions": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44346",
+    "created_at": "2026-02-27T21:47:45Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44051/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44051",
-    "labels": [],
-    "merged": true,
-    "number": 44051,
+    "files_url": "https://github.com/huggingface/transformers/pull/44346/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44346",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44346,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[`chore`] Fix incorrect forward type hint for Gemma3n",
-    "updated_at": "2026-02-20T09:08:07Z"
+    "title": "fix(deepseek): pass config.rms_norm_eps to MLA q/kv layernorms",
+    "updated_at": "2026-03-02T13:26:21Z"
   },
   {
-    "additions": 15,
-    "author": "tomaarsen",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? * Patch `get_text_features` for ChineseCLIP ### Details The `get_text_features` assumes that the `text_model` returns a `BaseModelOutputWithPooling`, just like is done with many other models. Currently, the `get_tex\u2026",
-    "changed_files": 7,
+    "additions": 13,
+    "author": "manavshrivastavagit",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Fixes #44303 When redirecting `from_pretrained` output to a log file (e.g. in CI), the \"Loading weights\" tqdm bar was updating its postfix with `Materializing param=...` on every parameter, producing huge log files. ## Change -\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44050",
-    "created_at": "2026-02-16T17:23:31Z",
-    "deletions": 19,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44345",
+    "created_at": "2026-02-27T21:05:22Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44050/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44050",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44345/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44345",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44050,
-    "review_comments_count": 4,
+    "number": 44345,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[`fix`] Patch `get_text_features` for ChineseCLIP",
-    "updated_at": "2026-02-17T09:55:17Z"
+    "title": "Less verbose weight-loading tqdm when stdout is not a TTY (fixes #44303)",
+    "updated_at": "2026-03-02T13:49:11Z"
   },
   {
-    "additions": 59,
-    "author": "ManasVardhan",
+    "additions": 6,
+    "author": "manavshrivastavagit",
     "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Refactors the `fnet` model to use the new `@capture_outputs` and `@can_return_tuple` decorators, as part of #43979. ### Changes: - Added `_can_record_outputs = {\"hidden_states\": FNetLayer}` to `FNetPreTrainedModel`\u2026",
+    "body_excerpt": "## Summary Fixes #44297 Qwen3.5 models on the Hub (e.g. [Qwen/Qwen3.5-27B](https://huggingface.co/Qwen/Qwen3.5-27B)) use `\"tokenizer_class\": \"Qwen2Tokenizer\"` in `tokenizer_config.json`, but `TOKENIZER_MAPPING_NAMES` had `qwen3_5` \u2192 `\"Qwen\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44049",
-    "created_at": "2026-02-16T17:19:04Z",
-    "deletions": 112,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44344",
+    "created_at": "2026-02-27T21:04:27Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44049/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44049",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44344/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44344",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44049,
+    "number": 44344,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Refactor fnet model to use @capture_outputs and @can_return_tuple decorators",
-    "updated_at": "2026-03-03T00:30:13Z"
+    "title": "Fix tokenizer_class in tokenizer_config.json for Qwen3.5 save_pretrained (fixes #44297)",
+    "updated_at": "2026-03-02T13:17:41Z"
   },
   {
-    "additions": 4,
-    "author": "tomaarsen",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? * Fix up `__repr__` whitespace/brackets ## Reproducer ```python from transformers import AutoTokenizer, PreTrainedTokenizerBase # __repr__ via PreTrainedTokenizerBase tokenizer = AutoTokenizer.from_pretrained(\"bert-\u2026",
+    "additions": 16,
+    "author": "manavshrivastavagit",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Fixes #44336 `utils/loading_report.py` was emitting ANSI codes for **bold** and *italic* via `PALETTE['bold']` and `PALETTE['italic']` without checking if stdout is connected to a terminal. `_color()` already respects `sys.stdou\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44048",
-    "created_at": "2026-02-16T17:18:10Z",
-    "deletions": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44343",
+    "created_at": "2026-02-27T20:58:33Z",
+    "deletions": 9,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44048/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44048",
-    "labels": [],
-    "merged": true,
-    "number": 44048,
+    "files_url": "https://github.com/huggingface/transformers/pull/44343/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44343",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44343,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[`simple`] Fix up `__repr__` whitespace/brackets",
-    "updated_at": "2026-02-20T10:03:34Z"
+    "title": "Fix ANSI codes in loading_report when stdout is not a TTY (fixes #44336)",
+    "updated_at": "2026-03-02T13:44:43Z"
   },
   {
-    "additions": 35,
-    "author": "ManasVardhan",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Refactors the `bloom` model to use the new `@capture_outputs` and `@can_return_tuple` decorators, as part of the effort in #43979. ### Changes: - Add `_can_record_outputs` dict to `BloomPreTrainedModel` mapping `hi\u2026",
-    "changed_files": 1,
+    "additions": 383,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "- created a new performance section divided into memory and speed optimizations - model memory training anatomy [guide](https://huggingface.co/docs/transformers/main/en/model_memory_anatomy) is now the more descriptive and simplified GPU m\u2026",
+    "changed_files": 9,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44047",
-    "created_at": "2026-02-16T17:15:25Z",
-    "deletions": 104,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44342",
+    "created_at": "2026-02-27T20:10:49Z",
+    "deletions": 273,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44047/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44047",
+    "files_url": "https://github.com/huggingface/transformers/pull/44342/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44342",
     "labels": [],
     "merged": false,
-    "number": 44047,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Refactor bloom model to use @capture_outputs and @can_return_tuple decorators",
-    "updated_at": "2026-03-03T00:30:14Z"
+    "number": 44342,
+    "review_comments_count": 12,
+    "state": "open",
+    "title": "[docs] training performance",
+    "updated_at": "2026-03-16T20:24:33Z"
   },
   {
-    "additions": 24,
-    "author": "ManasVardhan",
+    "additions": 12,
+    "author": "Kokonico",
     "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Refactors the `codegen` model to use the `@capture_outputs` and `@can_return_tuple` decorators, replacing manual `output_attentions`/`output_hidden_states`/`return_dict` handling. ### Changes: - Add `_can_record_ou\u2026",
+    "body_excerpt": "Fixes #44336 ## Changes * Added a new `_palette` function to return the ANSI code for a given color or format only if `sys.stdout` is interactive. (`src/transformers/utils/loading_report.py`) * Updated all usages of `PALETTE[<format>]` in\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44046",
-    "created_at": "2026-02-16T17:07:38Z",
-    "deletions": 70,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44341",
+    "created_at": "2026-02-27T19:30:30Z",
+    "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44046/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44046",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44341/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44341",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44046,
-    "review_comments_count": 0,
+    "number": 44341,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Refactor codegen model to use @capture_outputs and @can_return_tuple decorators",
-    "updated_at": "2026-02-17T14:15:23Z"
+    "title": "Fix and optimize ANSI color handling in loading report for interactive terminals",
+    "updated_at": "2026-03-02T18:16:00Z"
   },
   {
-    "additions": 456215,
-    "author": "ArthurZucker",
+    "additions": 33,
+    "author": "Rocketknight1",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 4939,
+    "body_excerpt": "Some speculative tests seem flaky with SDPA but reliable with `eager` attention. In local testing, `test_speculative_decoding_equals_regular_decoding` fails 5-10% of the time without this change. and I also saw CI failures. Failures are re\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44045",
-    "created_at": "2026-02-16T17:01:41Z",
-    "deletions": 591028,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/44045/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44045",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44340",
+    "created_at": "2026-02-27T18:09:09Z",
+    "deletions": 27,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44340/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44340",
     "labels": [],
-    "merged": false,
-    "number": 44045,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44340,
+    "review_comments_count": 6,
     "state": "closed",
-    "title": "Flash-mla-interface",
-    "updated_at": "2026-02-16T17:11:51Z"
+    "title": "Fix speculative tests that are flaky with SDPA",
+    "updated_at": "2026-03-02T17:18:27Z"
   },
   {
-    "additions": 49,
-    "author": "rwtarpit",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 1,
-    "cluster_id": "cluster-43979-28",
-    "cluster_ids": [
-      "cluster-43979-28"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44044",
-    "created_at": "2026-02-16T16:43:19Z",
-    "deletions": 112,
+    "additions": 6221,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? \u2192 This PR adds **DEIMv2** to Transformers! \u2192 **IMP:** I've linked two notebooks: a [Colab notebook here](https://colab.research.google.com/drive/1jCNefxrKiHWdBEIYTcU3jsd9xyWDwIxC?usp=sharing) demonstrating the fun\u2026",
+    "changed_files": 16,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 19,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44339",
+    "created_at": "2026-02-27T18:08:53Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44044/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44044",
+    "files_url": "https://github.com/huggingface/transformers/pull/44339/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44339",
     "labels": [],
     "merged": false,
-    "number": 44044,
-    "review_comments_count": 0,
+    "number": 44339,
+    "review_comments_count": 199,
     "state": "open",
-    "title": "Refactor DeBERTa's output tracing interface",
-    "updated_at": "2026-02-16T18:57:29Z"
+    "title": "model: Add DEIMv2 to Transformers",
+    "updated_at": "2026-04-02T19:53:45Z"
   },
   {
-    "additions": 170,
-    "author": "IlyasMoutawwakil",
+    "additions": 3641,
+    "author": "SunMarc",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 31,
+    "body_excerpt": "# What does this PR do? This PR rework a bit how distributed tests are tested. I tried to keep some of the existing tests and added new tests also. For each of these distributed methods (ddp, fsdp, deepspeeed), we have some common tests li\u2026",
+    "changed_files": 38,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44043",
-    "created_at": "2026-02-16T16:23:57Z",
-    "deletions": 162,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44338",
+    "created_at": "2026-02-27T17:50:16Z",
+    "deletions": 3762,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44043/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44043",
+    "files_url": "https://github.com/huggingface/transformers/pull/44338/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44338",
     "labels": [],
     "merged": true,
-    "number": 44043,
-    "review_comments_count": 15,
+    "number": 44338,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "`grouped_mm` fallback",
-    "updated_at": "2026-02-23T13:58:09Z"
+    "title": "Update distributed tests",
+    "updated_at": "2026-03-05T23:35:36Z"
   },
   {
-    "additions": 1,
-    "author": "Rocketknight1",
+    "additions": 2,
+    "author": "stevhliu",
     "author_association": "MEMBER",
-    "body_excerpt": "Joao is regrettably no longer with us :saluting_face: so we should really stop getting users to ping him! This PR makes @cyrilvallez responsible for `generate` issues outside of VLMs.",
-    "changed_files": 1,
+    "body_excerpt": "- moves `kernels-community/flash-attn2:FlashAttention2` to `from_pretrained(attn_implementation...)` - fix error message for registering a kernel",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44042",
-    "created_at": "2026-02-16T16:00:36Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44337",
+    "created_at": "2026-02-27T17:36:54Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44042/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44042",
+    "files_url": "https://github.com/huggingface/transformers/pull/44337/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44337",
     "labels": [],
     "merged": true,
-    "number": 44042,
+    "number": 44337,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Update assignee for generate in bug report template",
-    "updated_at": "2026-02-16T16:09:19Z"
+    "title": "[docs] kernelconfig fix",
+    "updated_at": "2026-02-27T22:46:30Z"
   },
   {
-    "additions": 469,
-    "author": "winglian",
-    "author_association": "COLLABORATOR",
-    "body_excerpt": "# What does this PR do? Alternate PR to #43985 to be a reorder only PR. <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, s\u2026",
-    "changed_files": 4,
+    "additions": 57,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Refactors the RoFormer model to use the `@capture_outputs` and `@can_return_tuple` decorators, following the established pattern (similar to #44047 for Bloom, #44151 for BioGPT, etc.). ### Changes: - **`RoFormerMod\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 9,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44041",
-    "created_at": "2026-02-16T15:40:41Z",
-    "deletions": 457,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44335",
+    "created_at": "2026-02-27T17:23:01Z",
+    "deletions": 172,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44041/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44041",
+    "files_url": "https://github.com/huggingface/transformers/pull/44335/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44335",
     "labels": [],
-    "merged": true,
-    "number": 44041,
-    "review_comments_count": 14,
+    "merged": false,
+    "number": 44335,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "refactor _inner_training_loop to smaller methods",
-    "updated_at": "2026-02-23T16:52:09Z"
+    "title": "Refactor RoFormer output tracing with @capture_outputs and @can_return_tuple",
+    "updated_at": "2026-03-03T00:29:59Z"
   },
   {
-    "additions": 366,
-    "author": "zucchini-nlp",
+    "additions": 1,
+    "author": "NielsRogge",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/44008 and re-enables tests",
-    "changed_files": 5,
+    "body_excerpt": "# What does this PR do? I had some issues with running `transformers-cli add-new-model-like`. This PR fixes it. Fixes #44661.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 8,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44040",
-    "created_at": "2026-02-16T12:43:28Z",
-    "deletions": 230,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44334",
+    "created_at": "2026-02-27T17:13:44Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44040/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44040",
+    "files_url": "https://github.com/huggingface/transformers/pull/44334/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44334",
     "labels": [],
     "merged": true,
-    "number": 44040,
-    "review_comments_count": 14,
+    "number": 44334,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix gemma3n `get_audio_features`",
-    "updated_at": "2026-02-19T12:50:00Z"
+    "title": "Fix CookieCutter",
+    "updated_at": "2026-03-13T17:13:28Z"
   },
   {
-    "additions": 47,
-    "author": "itzyesse99-lgtm",
+    "additions": 13,
+    "author": "ManasVardhan",
     "author_association": "NONE",
-    "body_excerpt": "```diff diff --git a/transformers/modeling_utils.py b/transformers/modeling_utils.py index 1234567..8901234 100644 --- a/transformers/modeling_utils.py +++ b/transformers/modeling_utils.py @@ -10,6 +10,7 @@ from transformers import PreTrai\u2026",
+    "body_excerpt": "## What does this PR do? Refactors the ALBERT model to use named attribute access instead of index-based access on model outputs, and removes redundant `return_dict=True` arguments from inner model calls (already handled by `@capture_outpu\u2026",
     "changed_files": 1,
-    "cluster_id": "cluster-43979-28",
-    "cluster_ids": [
-      "cluster-43979-28"
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44333",
+    "created_at": "2026-02-27T17:12:25Z",
+    "deletions": 18,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44333/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44333",
+    "labels": [
+      "Code agent slop"
     ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44039",
-    "created_at": "2026-02-16T12:01:26Z",
-    "deletions": 0,
+    "merged": false,
+    "number": 44333,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Refactor ALBERT to use named attributes and remove redundant return_dict=True",
+    "updated_at": "2026-03-02T13:05:54Z"
+  },
+  {
+    "additions": 3,
+    "author": "tysoncung",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Fix minor typos found in comments and docstrings: - `orignal` \u2192 `original` in `src/transformers/integrations/peft.py` (lines 245, 284) - Duplicate word `is is` \u2192 `is` in `src/transformers/models/dia/processing_dia.py` (line 89) Small clean\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44332",
+    "created_at": "2026-02-27T16:11:46Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44039/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44039",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 44039,
+    "files_url": "https://github.com/huggingface/transformers/pull/44332/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44332",
+    "labels": [],
+    "merged": true,
+    "number": 44332,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "AI Fix for #43979",
-    "updated_at": "2026-03-14T12:34:32Z"
+    "title": "Fix typos in comments and docstrings",
+    "updated_at": "2026-02-27T18:02:59Z"
   },
   {
-    "additions": 23,
-    "author": "Cyrilvallez",
+    "additions": 33,
+    "author": "kashif",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. Supersedes https://github.com/huggingface/transformers/pull/43913",
+    "body_excerpt": "# What does this PR do? fixed the bfloat16 dtype mismatch and Loss computation shape mismatch. Also added tests for these. <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to ap\u2026",
     "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44037",
-    "created_at": "2026-02-16T11:02:12Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44331",
+    "created_at": "2026-02-27T15:46:08Z",
     "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44037/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44037",
+    "files_url": "https://github.com/huggingface/transformers/pull/44331/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44331",
     "labels": [],
     "merged": true,
-    "number": 44037,
-    "review_comments_count": 3,
+    "number": 44331,
+    "review_comments_count": 9,
     "state": "closed",
-    "title": "Add a dim check mechanism in Transpose and fix qwen3_vl_moe weight mapping",
-    "updated_at": "2026-02-16T16:01:12Z"
+    "title": "[timesfm2_5] fix timesfm2.5 loss",
+    "updated_at": "2026-03-03T17:22:56Z"
   },
   {
-    "additions": 0,
-    "author": "ydshieh",
+    "additions": 289,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? So the following logic added in a previous PR #44033 could take effect ```python # `include_all` is `True` when the CI is running on a pull request, so it treats all failing tests # in the current CI run as \"new fai\u2026",
+    "body_excerpt": "# What does this PR do? As per the title! Follow-up of https://github.com/huggingface/transformers/pull/44181 with more models!",
+    "changed_files": 136,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 12,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44330",
+    "created_at": "2026-02-27T15:33:02Z",
+    "deletions": 1682,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44330/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44330",
+    "labels": [],
+    "merged": true,
+    "number": 44330,
+    "review_comments_count": 13,
+    "state": "closed",
+    "title": "Remove `cache_position` in more models",
+    "updated_at": "2026-03-11T14:47:50Z"
+  },
+  {
+    "additions": 3,
+    "author": "linfeng-du",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44036",
-    "created_at": "2026-02-16T10:14:54Z",
-    "deletions": 12,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44329",
+    "created_at": "2026-02-27T15:27:39Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44036/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44036",
+    "files_url": "https://github.com/huggingface/transformers/pull/44329/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44329",
     "labels": [],
     "merged": true,
-    "number": 44036,
-    "review_comments_count": 0,
+    "number": 44329,
+    "review_comments_count": 4,
     "state": "closed",
-    "title": "Remove `other_workflow_run_ids` for `issue_comment` in `utils/notification_service.py`",
-    "updated_at": "2026-02-16T10:24:07Z"
+    "title": "Enable Liger Kernel when doing hyperparameter search.",
+    "updated_at": "2026-03-03T13:44:56Z"
   },
   {
-    "additions": 25,
-    "author": "ArthurZucker",
+    "additions": 92,
+    "author": "SunMarc",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? We already brought it back with: ```python if clean_up_tokenization_spaces: # Call custom cleanup method if it exists (e.g., for CLVP's [SPACE] token replacement) if hasattr(self, \"clean_up_tokenization\") and callab\u2026",
-    "changed_files": 7,
+    "body_excerpt": "# What does this PR do? This PR adds guidelines for agents when it comes to add/run trainer tests. This needs to be updated as we modify, refactor the code !",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44035",
-    "created_at": "2026-02-16T09:49:28Z",
-    "deletions": 112,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44328",
+    "created_at": "2026-02-27T15:17:24Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44035/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44035",
+    "files_url": "https://github.com/huggingface/transformers/pull/44328/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44328",
     "labels": [],
     "merged": true,
-    "number": 44035,
-    "review_comments_count": 0,
+    "number": 44328,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "bring back our demons: clean_up_tokenization_spaces",
-    "updated_at": "2026-02-20T14:50:18Z"
+    "title": "Add testing guide for agents for trainer tests",
+    "updated_at": "2026-02-27T17:32:11Z"
   },
   {
-    "additions": 18,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 2,
-    "cluster_id": "cluster-43488-10",
-    "cluster_ids": [
-      "cluster-43488-10"
-    ],
-    "cluster_role": "member",
-    "comments_count": 8,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44034",
-    "created_at": "2026-02-16T08:04:20Z",
+    "additions": 38,
+    "author": "overcastbulb",
+    "author_association": "NONE",
+    "body_excerpt": "Adds missing pipeline tutorial example for zero-shot-classification following the existing format of other task examples in pipeline_tutorial.md. Related: #18926",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44326",
+    "created_at": "2026-02-27T14:37:19Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44034/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44034",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44326/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44326",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44034,
+    "number": 44326,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "don't merge check workflow",
-    "updated_at": "2026-02-16T10:52:50Z"
+    "title": "docs: Add zero-shot-classification example to pipeline tutorial",
+    "updated_at": "2026-02-27T14:46:24Z"
   },
   {
-    "additions": 143,
-    "author": "ydshieh",
+    "additions": 4,
+    "author": "kashif",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Improve new failing test analysis for PR comment CI",
-    "changed_files": 3,
+    "body_excerpt": "# What does this PR do? The `CLIPMLP` has the bias set to True but timesFM 2.5 uses `bias=False` in the pretrained model <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appe\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44033",
-    "created_at": "2026-02-16T07:30:33Z",
-    "deletions": 49,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44325",
+    "created_at": "2026-02-27T13:18:40Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44033/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44033",
+    "files_url": "https://github.com/huggingface/transformers/pull/44325/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44325",
     "labels": [],
     "merged": true,
-    "number": 44033,
+    "number": 44325,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Improve new failing test analysis for PR comment CI",
-    "updated_at": "2026-02-16T08:02:16Z"
+    "title": "[timesfm2_5] fix timesfm mlp bias",
+    "updated_at": "2026-02-27T13:36:13Z"
   },
   {
-    "additions": 3,
-    "author": "JJJYmmm",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Update FP8 expert replacement to use `model.config.text_config` when available (VLMs), falling back to model.config if it's text-only models.",
-    "changed_files": 1,
+    "additions": 16,
+    "author": "tonglei19961121",
+    "author_association": "NONE",
+    "body_excerpt": "Add tutorial example for DocumentQuestionAnswering pipeline following the existing format of other task examples. This PR addresses the first TODO item in #18926. Changes: - Added document-question-answering task example to pipeline_tutori\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44032",
-    "created_at": "2026-02-16T06:02:28Z",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44324",
+    "created_at": "2026-02-27T12:29:06Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44032/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44032",
-    "labels": [],
-    "merged": true,
-    "number": 44032,
-    "review_comments_count": 2,
+    "files_url": "https://github.com/huggingface/transformers/pull/44324/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44324",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44324,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[Misc][vlms] Use text_config when initializing the fine-grained FP8Expert",
-    "updated_at": "2026-02-19T10:28:31Z"
+    "title": "docs: Add document-question-answering example to pipeline tutorial",
+    "updated_at": "2026-02-27T14:35:23Z"
   },
   {
-    "additions": 11,
-    "author": "omkar-334",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR refactors the`dpr` model as per #43979 cc @molbap <img width=\"853\" height=\"323\" alt=\"Screenshot 2026-02-16 at 9 13 30 AM\" src=\"https://github.com/user-attachments/assets/d658f1d0-75e8-4eac-8a12-9aeddf194dde\" />",
-    "changed_files": 1,
-    "cluster_id": "cluster-43998-11",
-    "cluster_ids": [
-      "cluster-43998-11"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44030",
-    "created_at": "2026-02-16T03:44:19Z",
-    "deletions": 58,
+    "additions": 16,
+    "author": "tonglei19961121",
+    "author_association": "NONE",
+    "body_excerpt": "Add tutorial example for DocumentQuestionAnswering pipeline following the existing format of other task examples. Fixes #18926",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44323",
+    "created_at": "2026-02-27T12:26:00Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44030/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44030",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44323/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44323",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44030,
-    "review_comments_count": 1,
-    "state": "open",
-    "title": "refactor output tracing in `dpr`",
-    "updated_at": "2026-02-17T07:46:00Z"
+    "number": 44323,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "docs: Add document-question-answering example to pipeline tutorial",
+    "updated_at": "2026-02-27T14:38:43Z"
   },
   {
-    "additions": 21,
-    "author": "omkar-334",
+    "additions": 12,
+    "author": "kaixuanliu",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR refactors the`rwkv` model as per #43979 cc @molbap <img width=\"856\" height=\"333\" alt=\"Screenshot 2026-02-16 at 9 06 34 AM\" src=\"https://github.com/user-attachments/assets/9c8c5d41-ffbd-45f6-8b9b-1429bcb14543\" />",
+    "body_excerpt": "@vasqu This PR skipped 2 invalid test cases: ``` tests/models/voxtral_realtime/test_modeling_voxtral_realtime.py::VoxtralRealtimeForConditionalGenerationModelTest::test_generate_with_quant_cache tests/models/voxtral_realtime/test_modeling_\u2026",
     "changed_files": 1,
-    "cluster_id": "cluster-43998-11",
+    "cluster_id": "cluster-43324-21",
     "cluster_ids": [
-      "cluster-43998-11"
+      "cluster-43324-21"
     ],
     "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44029",
-    "created_at": "2026-02-16T03:37:13Z",
-    "deletions": 55,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44321",
+    "created_at": "2026-02-27T09:54:14Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44029/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44029",
+    "files_url": "https://github.com/huggingface/transformers/pull/44321/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44321",
     "labels": [],
-    "merged": false,
-    "number": 44029,
+    "merged": true,
+    "number": 44321,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "refactor output tracing in `rwkv`",
-    "updated_at": "2026-02-17T07:47:02Z"
+    "state": "closed",
+    "title": "skip 2 invalid test cases for voxtral_realtime model",
+    "updated_at": "2026-04-02T03:22:04Z"
   },
   {
-    "additions": 13,
-    "author": "omkar-334",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR refactors the`superpoint` model as per #43979 cc @molbap <img width=\"857\" height=\"334\" alt=\"Screenshot 2026-02-16 at 8 53 43 AM\" src=\"https://github.com/user-attachments/assets/17781b76-743b-4b38-923a-8db3b94ccd01\" />",
-    "changed_files": 2,
-    "cluster_id": "cluster-43998-11",
-    "cluster_ids": [
-      "cluster-43998-11"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44028",
-    "created_at": "2026-02-16T03:25:14Z",
-    "deletions": 46,
+    "additions": 4894,
+    "author": "NielsRogge",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR adds [SAM3-LiteText: An Anatomical Study of the SAM3 Text Encoder for Efficient Vision-Language Segmentation](https://huggingface.co/papers/2602.12173). Fixes #44205",
+    "changed_files": 18,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44320",
+    "created_at": "2026-02-27T08:29:00Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44028/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44028",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44320/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44320",
+    "labels": [
+      "New model"
+    ],
     "merged": false,
-    "number": 44028,
-    "review_comments_count": 0,
+    "number": 44320,
+    "review_comments_count": 28,
     "state": "open",
-    "title": "refactor output tracing for `superpoint`",
-    "updated_at": "2026-02-17T07:46:06Z"
+    "title": "Add SAM3-LiteText",
+    "updated_at": "2026-04-06T20:45:48Z"
   },
   {
-    "additions": 6,
-    "author": "omkar-334",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR refactors the `speech_encoder_decoder` model as per #43979 cc @molbap <img width=\"852\" height=\"335\" alt=\"Screenshot 2026-02-16 at 8 44 05 AM\" src=\"https://github.com/user-attachments/assets/ee25c72b-b995-403c-b47b-3e9cbae0d2cc\" />",
-    "changed_files": 1,
-    "cluster_id": "cluster-43998-11",
-    "cluster_ids": [
-      "cluster-43998-11"
-    ],
-    "cluster_role": "member",
+    "additions": 74,
+    "author": "IlyasMoutawwakil",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44027",
-    "created_at": "2026-02-16T03:14:41Z",
-    "deletions": 22,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44319",
+    "created_at": "2026-02-27T08:20:45Z",
+    "deletions": 56,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44027/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44027",
+    "files_url": "https://github.com/huggingface/transformers/pull/44319/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44319",
     "labels": [],
-    "merged": false,
-    "number": 44027,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "refactor output tracing in `speech_encoder_decoder`",
-    "updated_at": "2026-02-17T09:04:35Z"
+    "merged": true,
+    "number": 44319,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "Support non-gated experts",
+    "updated_at": "2026-03-02T19:26:38Z"
   },
   {
-    "additions": 12,
-    "author": "omkar-334",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR refactors the`vision_encoder_decoder` model as per #43979 cc @molbap <img width=\"849\" height=\"333\" alt=\"Screenshot 2026-02-16 at 8 28 20 AM\" src=\"https://github.com/user-attachments/assets/9f511a17-947b-46ed-82a8-8bb9bb103f15\" />",
+    "additions": 10,
+    "author": "yoginlangalia",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Adds input validation for bounding box shape in `LayoutLMv3Tokenizer`. When users pass boxes with fewer (or more) than 4 values per box, the tokenizer now raises a clear `ValueError` instead of a confusing generic\u2026",
     "changed_files": 1,
-    "cluster_id": "cluster-43998-11",
-    "cluster_ids": [
-      "cluster-43998-11"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44026",
-    "created_at": "2026-02-16T02:59:14Z",
-    "deletions": 22,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44318",
+    "created_at": "2026-02-27T06:40:02Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44026/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44026",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44318/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44318",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44026,
+    "number": 44318,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "refactor output tracing for `vision_encoder_decoder`",
-    "updated_at": "2026-02-17T09:05:22Z"
+    "state": "closed",
+    "title": "Validate bounding box shape in LayoutLMv3Tokenizer",
+    "updated_at": "2026-02-27T14:43:08Z"
   },
   {
-    "additions": 7,
-    "author": "omkar-334",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR refactors the `depth_anything` model as per #43979 cc @molbap <img width=\"840\" height=\"330\" alt=\"Screenshot 2026-02-16 at 8 25 01 AM\" src=\"https://github.com/user-attachments/assets/fe7770be-70cb-4343-accb-7407c6bbb4f8\" />",
+    "additions": 4,
+    "author": "sxu75374",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? Passes `config.rms_norm_eps` explicitly to `q_a_layernorm` and `kv_a_layernorm` in both DeepSeek V2 and V3 MLA attention. Currently these two norms are constructed without `eps`, falling back to the `RMSNorm` class\u2026",
     "changed_files": 2,
-    "cluster_id": "cluster-43998-11",
-    "cluster_ids": [
-      "cluster-43998-11"
-    ],
-    "cluster_role": "canonical",
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44025",
-    "created_at": "2026-02-16T02:56:17Z",
-    "deletions": 23,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44317",
+    "created_at": "2026-02-27T04:48:08Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44025/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44025",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44317/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44317",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44025,
+    "number": 44317,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "refactor output tracing for `depth_anything`",
-    "updated_at": "2026-02-17T07:46:31Z"
+    "state": "closed",
+    "title": "fix(deepseek): pass rms_norm_eps to MLA q/kv layernorms",
+    "updated_at": "2026-02-27T14:30:04Z"
   },
   {
-    "additions": 15,
-    "author": "mmahjoub5",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR refactors the FocalNet implementation to adopt the new `@capture_outputs` and `@can_return_tuple` decorators for standardized output collection, as part of https://github.com/huggingface/transformers/issues/\u2026",
+    "additions": 2,
+    "author": "jashshah999",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Fixes #44303 The weight loading progress bar called `pbar.refresh()` on every single parameter, bypassing tqdm's built-in rate-limiting. When output is redirected to a log file (e.g. in CI), this produced one line per parameter -- hundreds\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44024",
-    "created_at": "2026-02-15T23:48:12Z",
-    "deletions": 60,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44316",
+    "created_at": "2026-02-27T03:08:28Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44024/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44024",
+    "files_url": "https://github.com/huggingface/transformers/pull/44316/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44316",
     "labels": [],
     "merged": false,
-    "number": 44024,
+    "number": 44316,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Focalnet standardized outputs",
-    "updated_at": "2026-02-17T08:47:48Z"
+    "state": "closed",
+    "title": "Reduce tqdm verbosity during weight loading",
+    "updated_at": "2026-03-03T17:02:34Z"
   },
   {
-    "additions": 32,
-    "author": "ManasVardhan",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Refactors the Nystromformer model to use the new `@capture_outputs` and `@can_return_tuple` decorators for standardized output collection, as part of #43979. ### Changes: - Add `_can_record_outputs` on `Nystromform\u2026",
-    "changed_files": 1,
-    "cluster_id": "cluster-43979-28",
-    "cluster_ids": [
-      "cluster-43979-28"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44023",
-    "created_at": "2026-02-15T21:53:48Z",
-    "deletions": 122,
+    "additions": 3615,
+    "author": "jp1924",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Hello, Transformers team! I submitted a PR to add naver-hyperclovax/HyperCLOVAX-SEED-Think-32B (hereafter HCX), developed by the Korean IT company Naver while executing the government's national AI model project. Th\u2026",
+    "changed_files": 23,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 20,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44314",
+    "created_at": "2026-02-27T02:01:28Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44023/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44023",
+    "files_url": "https://github.com/huggingface/transformers/pull/44314/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44314",
     "labels": [],
     "merged": false,
-    "number": 44023,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Refactor Nystromformer output tracing with @capture_outputs and @can_return_tuple decorators",
-    "updated_at": "2026-03-03T00:30:15Z"
+    "number": 44314,
+    "review_comments_count": 77,
+    "state": "open",
+    "title": "add HyperClovaX Vision",
+    "updated_at": "2026-04-07T08:25:07Z"
   },
   {
-    "additions": 57,
-    "author": "ManasVardhan",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Refactors the ConvBERT model to use the new `@capture_outputs` and `@can_return_tuple` decorators for standardized output collection, as part of #43979. ### Changes: - Move `ConvBertPreTrainedModel` after layer def\u2026",
+    "additions": 4,
+    "author": "jashshah999",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Fixes the same `TypeError: AddedToken() got multiple values for keyword argument 'special'` that #44281 addressed, but for the `extra_special_tokens` code path which was missed. #44281 (commit 8e663c7) correctly added `value.pop(\"special\",\u2026",
     "changed_files": 1,
-    "cluster_id": "cluster-43979-28",
-    "cluster_ids": [
-      "cluster-43979-28"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44022",
-    "created_at": "2026-02-15T21:49:57Z",
-    "deletions": 152,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44313",
+    "created_at": "2026-02-27T01:37:45Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44022/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44022",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44313/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44313",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44022,
+    "number": 44313,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Refactor ConvBERT output tracing with @capture_outputs and @can_return_tuple decorators",
-    "updated_at": "2026-03-03T00:30:17Z"
+    "title": "Fix AddedToken duplicate 'special' kwarg for extra_special_tokens",
+    "updated_at": "2026-02-27T14:26:28Z"
   },
   {
-    "additions": 22,
-    "author": "ManasVardhan",
+    "additions": 8,
+    "author": "haosenwang1018",
     "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Fixes #43906 (related to #38071) ### Problem When using `pipeline('text-generation')` with batched inference on Qwen3 (and other models where `pad_token_id == bos_token_id`), a spurious warning is emitted: > A deco\u2026",
-    "changed_files": 3,
+    "body_excerpt": "Replace bare `except:` clauses with `except Exception:` for PEP 8 compliance.",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44021",
-    "created_at": "2026-02-15T21:45:58Z",
-    "deletions": 13,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44312",
+    "created_at": "2026-02-27T01:00:33Z",
+    "deletions": 8,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44021/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44021",
+    "files_url": "https://github.com/huggingface/transformers/pull/44312/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44312",
     "labels": [],
-    "merged": true,
-    "number": 44021,
+    "merged": false,
+    "number": 44312,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix false positive right-padding warning for decoder-only models in pipeline",
-    "updated_at": "2026-02-17T10:41:32Z"
+    "title": "fix: replace 8 bare except clauses with except Exception",
+    "updated_at": "2026-02-27T03:27:27Z"
   },
   {
-    "additions": 28,
-    "author": "ManasVardhan",
+    "additions": 38,
+    "author": "onel",
     "author_association": "NONE",
-    "body_excerpt": "## Summary Refactors the DeBERTa model to use the standardized output collection interface with `@capture_outputs` and `@can_return_tuple` decorators. ### Changes - Add `_can_record_outputs` on `DebertaPreTrainedModel` mapping `hidden_stat\u2026",
-    "changed_files": 1,
-    "cluster_id": "cluster-43979-28",
-    "cluster_ids": [
-      "cluster-43979-28"
-    ],
-    "cluster_role": "member",
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44020",
-    "created_at": "2026-02-15T21:39:17Z",
-    "deletions": 129,
+    "body_excerpt": "# What does this PR do? Adds species bias documentation across the transformers repository to help model authors and users recognize and address potential biases in language models. The updates include guidance on documenting bias categori\u2026",
+    "changed_files": 5,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44311",
+    "created_at": "2026-02-27T00:02:49Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44020/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44020",
+    "files_url": "https://github.com/huggingface/transformers/pull/44311/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44311",
     "labels": [],
     "merged": false,
-    "number": 44020,
+    "number": 44311,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Refactor DeBERTa output tracing with @capture_outputs and @can_return_tuple",
-    "updated_at": "2026-02-17T14:15:21Z"
+    "title": "Add species bias documentation to model cards and docs",
+    "updated_at": "2026-02-27T14:09:20Z"
   },
   {
-    "additions": 17,
-    "author": "Sid-V5",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Refactored the `resnet` model to use the standardized output tracing decorators (`@capture_outputs` and `@can_return_tuple`) as part of the migration ### Changes | File | Change | |------|--------| | `modeling_resnet.py` | Migrated to `@ca\u2026",
-    "changed_files": 1,
+    "additions": 63,
+    "author": "onel",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? Adds documentation for Pruna AI integration to the Transformers ecosystem, following the existing pattern used by vLLM and Unsloth integration docs. ## Changes - Created `docs/source/en/community_integrations/pruna.\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44019",
-    "created_at": "2026-02-15T19:53:19Z",
-    "deletions": 62,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44310",
+    "created_at": "2026-02-27T00:00:31Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44019/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44019",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44310/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44310",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44019,
+    "number": 44310,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Refactor `resnet` to use `@capture_outputs` / `@can_return_tuple` output tracing",
-    "updated_at": "2026-02-15T20:01:23Z"
+    "state": "closed",
+    "title": "docs: Add Pruna AI integration documentation",
+    "updated_at": "2026-02-27T14:08:21Z"
   },
   {
-    "additions": 41,
-    "author": "yashbora9",
-    "author_association": "FIRST_TIMER",
-    "body_excerpt": "## Summary - Migrates `gpt_neo` to the standardized output collection interface as part of #43979 - Adds `@capture_outputs` decorator on `GPTNeoModel.forward` (base model) - Adds `@can_return_tuple` decorator on all wrapper model forwards\u2026",
-    "changed_files": 2,
+    "additions": 129,
+    "author": "onel",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? Adds SkyPilot deployment documentation to the DeepSpeed guide. The new section includes: - Introduction to SkyPilot as a unified framework for running AI workloads across clouds and Kubernetes - Complete example YAM\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44018",
-    "created_at": "2026-02-15T19:35:06Z",
-    "deletions": 109,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44309",
+    "created_at": "2026-02-26T22:44:41Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44018/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44018",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44309/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44309",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44018,
+    "number": 44309,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Refactor GPT-Neo output tracing to use capture_outputs/can_return_tuple",
-    "updated_at": "2026-02-16T20:33:37Z"
+    "state": "closed",
+    "title": "Add SkyPilot deployment documentation to DeepSpeed guide",
+    "updated_at": "2026-02-27T14:09:55Z"
   },
   {
-    "additions": 13,
-    "author": "nexiouscaliver",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "This PR refactors \\`SegformersPreTrainedModel\\` and \\`SegformersForImageClassification\\` to use standardized \\`@capture_outputs\\` and \\`@can_return_tuple\\` decorators for automatic output collection. ### Changes 1. **Imported \\`@capture_ou\u2026",
-    "changed_files": 1,
+    "additions": 5854,
+    "author": "NielsRogge",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 15,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44017",
-    "created_at": "2026-02-15T19:27:22Z",
-    "deletions": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44308",
+    "created_at": "2026-02-26T21:26:05Z",
+    "deletions": 9,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44017/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44017",
+    "files_url": "https://github.com/huggingface/transformers/pull/44308/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44308",
     "labels": [],
     "merged": false,
-    "number": 44017,
+    "number": 44308,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Refactor output tracing in segformers (#43979)",
-    "updated_at": "2026-02-20T16:51:42Z"
+    "state": "closed",
+    "title": "Codex/add sam3 litetext model to transformers fuvllg",
+    "updated_at": "2026-02-26T21:35:44Z"
   },
   {
-    "additions": 95,
-    "author": "akashadsare",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "This PR migrates GPT-2 and its derivatives (GPTBigCode, Decision Transformer) to the new standardized output collection interface using the [@capture_outputs](vscode-file://vscode-app/usr/share/code/resources/app/out/vs/code/electron-brows\u2026",
-    "changed_files": 3,
+    "additions": 7,
+    "author": "imstevenpmwork",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR fixes a `TypeError: not all arguments converted during string formatting` caused by incorrectly passing `FutureWarning` as a second argument to `logger.warning_once()` in this file, introduced in https://git\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44015",
-    "created_at": "2026-02-15T18:07:11Z",
-    "deletions": 231,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44307",
+    "created_at": "2026-02-26T20:09:20Z",
+    "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44015/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44015",
+    "files_url": "https://github.com/huggingface/transformers/pull/44307/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44307",
     "labels": [],
+    "merged": true,
+    "number": 44307,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "fix(modeling_attn_mask_utils): remove FutureWarning from logger.warning_once()",
+    "updated_at": "2026-02-26T21:29:01Z"
+  },
+  {
+    "additions": 10,
+    "author": "jashshah999",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes a crash in `PretrainedConfig.update_from_string()` when the input string contains entries without `=` or with multiple `=` signs. **`configuration_utils.py`** - The existing code `dict(x.split(\"=\") for x in up\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44306",
+    "created_at": "2026-02-26T20:02:10Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44306/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44306",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44015,
-    "review_comments_count": 4,
-    "state": "open",
-    "title": "Refactor GPT2-based models to standardized output collection interface",
-    "updated_at": "2026-02-15T18:13:56Z"
+    "number": 44306,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix ValueError crash in PretrainedConfig.update_from_string on malformed input",
+    "updated_at": "2026-02-27T14:25:03Z"
   },
   {
-    "additions": 45,
-    "author": "weiguangli-io",
+    "additions": 7,
+    "author": "jashshah999",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary Fixes #43992 by preventing a false missing-key report for `UMT5EncoderModel` when `encoder.embed_tokens.weight` is tied to `shared.weight`. `UMT5EncoderModel` already declares tied weights, but loading checkpoints that only carr\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? Fixes a mutable default argument and two resource leaks: 1. **`integrations/tpu.py`** - `patched_optimizer_step` used `optimizer_args={}` as a default parameter. Mutable defaults are shared across calls, so any muta\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44014",
-    "created_at": "2026-02-15T15:17:22Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44305",
+    "created_at": "2026-02-26T19:22:33Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44014/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44014",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44305/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44305",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 44014,
+    "number": 44305,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[UMT5] Ignore tied encoder embedding missing-key warning",
-    "updated_at": "2026-02-16T13:40:21Z"
+    "title": "Fix mutable default in TPU optimizer and unclosed file handles",
+    "updated_at": "2026-02-27T14:21:58Z"
   },
   {
-    "additions": 10,
-    "author": "gabrielfruet",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 1,
-    "cluster_id": "cluster-43979-28",
-    "cluster_ids": [
-      "cluster-43979-28"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44013",
-    "created_at": "2026-02-15T13:49:53Z",
-    "deletions": 43,
+    "additions": 151,
+    "author": "adil-a",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Adds NeMo Automodel under the community integrations tab. ## Before submitting - [x] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [x] Did you read the [contribut\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44304",
+    "created_at": "2026-02-26T17:57:16Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44013/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44013",
+    "files_url": "https://github.com/huggingface/transformers/pull/44304/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44304",
     "labels": [],
-    "merged": false,
-    "number": 44013,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "Ouptut tracing: Standardizing MobileNetv2",
-    "updated_at": "2026-02-15T13:50:59Z"
+    "merged": true,
+    "number": 44304,
+    "review_comments_count": 9,
+    "state": "closed",
+    "title": "docs: Add NeMo Automodel community integration docs",
+    "updated_at": "2026-03-03T16:51:48Z"
   },
   {
-    "additions": 79,
-    "author": "ManasVardhan",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Refactors the **swinv2** model to use hook-based output capturing via `@capture_outputs` and `@can_return_tuple` decorators, removing the manual threading of `output_attentions`/`output_hidden_states`/`return_dict`\u2026",
-    "changed_files": 2,
+    "additions": 28,
+    "author": "michaelbenayoun",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Integrate the neuron device to TrainingArguments. It enables using the neuron device with the `Trainer` class.",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44012",
-    "created_at": "2026-02-15T11:20:17Z",
-    "deletions": 159,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44302",
+    "created_at": "2026-02-26T15:11:09Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44012/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44012",
+    "files_url": "https://github.com/huggingface/transformers/pull/44302/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44302",
     "labels": [],
-    "merged": false,
-    "number": 44012,
+    "merged": true,
+    "number": 44302,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Refactor output tracing for swinv2 model",
-    "updated_at": "2026-02-17T14:15:19Z"
+    "title": "Integrate the Neuron device to TrainingArguments",
+    "updated_at": "2026-03-05T15:11:00Z"
   },
   {
-    "additions": 79,
-    "author": "ManasVardhan",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Refactors the `swin` model to use the standardized output collection interface (`@capture_outputs` and `@can_return_tuple` decorators), as described in #43979. ### Changes **SwinPreTrainedModel:** - Added `_can_rec\u2026",
-    "changed_files": 2,
-    "cluster_id": "cluster-43979-28",
-    "cluster_ids": [
-      "cluster-43979-28"
-    ],
-    "cluster_role": "member",
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44011",
-    "created_at": "2026-02-15T11:11:02Z",
-    "deletions": 146,
+    "additions": 30,
+    "author": "likejazz",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? When fine-tuning Qwen3 with frameworks like TRL, `<think>` blocks are silently omitted from the token sequence, causing chain-of-thought reasoning data to be completely lost during training with no error or warning\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44301",
+    "created_at": "2026-02-26T14:30:24Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44011/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44011",
+    "files_url": "https://github.com/huggingface/transformers/pull/44301/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44301",
     "labels": [],
     "merged": false,
-    "number": 44011,
+    "number": 44301,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Refactor Swin output tracing with @capture_outputs and @can_return_tuple",
-    "updated_at": "2026-02-17T14:15:17Z"
+    "title": "Fix: Qwen3 `<think>` blocks not written during fine-tuning (TRL)",
+    "updated_at": "2026-03-02T17:18:03Z"
   },
   {
-    "additions": 41,
-    "author": "preetam1407",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "#43979. Refactors SqueezeBert to the standardized output collection interface: - Adds `_can_record_outputs` in `SqueezeBertPreTrainedModel` - Adds `@capture_outputs` on `SqueezeBertModel.forward` - Adds `@can_return_tuple` on task model fo\u2026",
-    "changed_files": 1,
-    "cluster_id": "cluster-43979-28",
-    "cluster_ids": [
-      "cluster-43979-28"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44010",
-    "created_at": "2026-02-15T09:40:09Z",
-    "deletions": 139,
+    "additions": 2539,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? The recursive feature is needed for me in https://github.com/huggingface/transformers/pull/44252 to allow timm backbone define its conversion only once. Also it currently allows to delete \"t5gemma2\" from conversion,\u2026",
+    "changed_files": 18,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 44,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44300",
+    "created_at": "2026-02-26T14:09:59Z",
+    "deletions": 470,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44010/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44010",
+    "files_url": "https://github.com/huggingface/transformers/pull/44300/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44300",
     "labels": [],
-    "merged": false,
-    "number": 44010,
-    "review_comments_count": 2,
-    "state": "open",
-    "title": "[SqueezeBert] Migrate to standardized output collection decorators",
-    "updated_at": "2026-03-02T13:04:52Z"
+    "merged": true,
+    "number": 44300,
+    "review_comments_count": 45,
+    "state": "closed",
+    "title": "Dynamic weight conversion is recursive",
+    "updated_at": "2026-03-26T11:59:06Z"
   },
   {
-    "additions": 1,
-    "author": "mariam851",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Fixes #43976 Updated the documentation to reflect the actual Python requirement (3.10+) as defined in setup.py. Changes: Updated README.md .",
-    "changed_files": 1,
+    "additions": 520,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "As per title, to allow for proper alignment with vllm/sglang Closes #44258",
+    "changed_files": 21,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44009",
-    "created_at": "2026-02-15T08:51:26Z",
-    "deletions": 1,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44299",
+    "created_at": "2026-02-26T12:50:03Z",
+    "deletions": 282,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44009/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44009",
+    "files_url": "https://github.com/huggingface/transformers/pull/44299/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44299",
     "labels": [],
     "merged": true,
-    "number": 44009,
-    "review_comments_count": 0,
+    "number": 44299,
+    "review_comments_count": 4,
     "state": "closed",
-    "title": "update python requirement to 3.10+ to match codebase",
-    "updated_at": "2026-02-16T13:46:56Z"
+    "title": ":rotating_light: [`Ernie 4.5 VL Moe`] Fix up namings to vllm/sglang convention",
+    "updated_at": "2026-02-26T16:42:50Z"
   },
   {
-    "additions": 26,
-    "author": "pdwi2020",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## Summary - refactor `ResNetModel` to use `@capture_outputs` for hidden-state collection - register `_can_record_outputs` on `ResNetPreTrainedModel` with `ResNetStage` - switch `ResNetForImageClassification` and `ResNetBackbone` to `@can_\u2026",
-    "changed_files": 3,
-    "cluster_id": "cluster-43979-28",
-    "cluster_ids": [
-      "cluster-43979-28"
-    ],
-    "cluster_role": "member",
+    "additions": 1145,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? A few issues we did not catch: - https://github.com/huggingface/transformers/blob/47b0e478f324b54f177ea7998a0791870fdd0324/src/transformers/convert_slow_tokenizer.py#L1314-L1315 missing from `GemmaTokenier` - SPM's\u2026",
+    "changed_files": 7,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44007",
-    "created_at": "2026-02-15T07:26:52Z",
-    "deletions": 58,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44007/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44007",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44298",
+    "created_at": "2026-02-26T12:34:38Z",
+    "deletions": 29,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44298/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44298",
     "labels": [],
     "merged": false,
-    "number": 44007,
-    "review_comments_count": 0,
+    "number": 44298,
+    "review_comments_count": 1,
     "state": "open",
-    "title": "[ResNet] Refactor output tracing to decorator-based interface",
-    "updated_at": "2026-02-19T15:49:49Z"
+    "title": "Auto detect wrong mapping models",
+    "updated_at": "2026-03-02T10:13:28Z"
   },
   {
-    "additions": 8,
-    "author": "cyyever",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR uses torch.xlogy for better numerical handling.",
-    "changed_files": 8,
+    "additions": 12302,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Needed for https://github.com/huggingface/transformers/pull/41250 to pass the docstring-checker in CI. Our basic checker doesn't handle well dataclasses so we can use `autodocstring`",
+    "changed_files": 512,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44006",
-    "created_at": "2026-02-15T04:07:50Z",
-    "deletions": 8,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44296",
+    "created_at": "2026-02-26T10:29:21Z",
+    "deletions": 37860,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44006/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44006",
+    "files_url": "https://github.com/huggingface/transformers/pull/44296/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44296",
     "labels": [],
     "merged": true,
-    "number": 44006,
-    "review_comments_count": 0,
+    "number": 44296,
+    "review_comments_count": 11,
     "state": "closed",
-    "title": "Use torch.xlogy ",
-    "updated_at": "2026-02-17T00:42:54Z"
+    "title": "Add auto-docstring on configs",
+    "updated_at": "2026-03-06T11:58:10Z"
   },
   {
-    "additions": 224,
-    "author": "cyyever",
+    "additions": 1,
+    "author": "mario-sanz",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR transfers grid_thw to a python list at the beginning of some functions to reduce later CUDA sync calls. Therefore, several sync calls are merged into one call.",
-    "changed_files": 16,
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44005",
-    "created_at": "2026-02-15T02:34:55Z",
-    "deletions": 254,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44294",
+    "created_at": "2026-02-26T08:30:52Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44005/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44005",
+    "files_url": "https://github.com/huggingface/transformers/pull/44294/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44294",
     "labels": [],
     "merged": true,
-    "number": 44005,
-    "review_comments_count": 1,
+    "number": 44294,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Reduce reduce CUDA sync",
-    "updated_at": "2026-02-17T01:00:52Z"
+    "title": "Fix: use `TokenizersBackend` for Olmo3 to preserve custom `pre_tokenizer`",
+    "updated_at": "2026-02-26T10:35:44Z"
   },
   {
-    "additions": 21,
-    "author": "omkar-334",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR refactors the `codegen` model as per #43979 cc @molbap <img width=\"843\" height=\"445\" alt=\"Screenshot 2026-02-15 at 5 24 52 AM\" src=\"https://github.com/user-attachments/assets/d5aeb711-96a7-4fd8-af7b-0aeac23eeeb1\" /> 2 tests are bei\u2026",
-    "changed_files": 1,
-    "cluster_id": "cluster-43998-11",
-    "cluster_ids": [
-      "cluster-43998-11"
-    ],
-    "cluster_role": "member",
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44004",
-    "created_at": "2026-02-14T23:56:18Z",
-    "deletions": 62,
+    "additions": 13,
+    "author": "IlyasMoutawwakil",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 15,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44293",
+    "created_at": "2026-02-26T08:25:23Z",
+    "deletions": 384,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44004/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44004",
+    "files_url": "https://github.com/huggingface/transformers/pull/44293/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44293",
     "labels": [],
-    "merged": false,
-    "number": 44004,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "refactor output tracing for `codegen`",
-    "updated_at": "2026-02-17T08:56:07Z"
+    "merged": true,
+    "number": 44293,
+    "review_comments_count": 7,
+    "state": "closed",
+    "title": "Strict export cleanup",
+    "updated_at": "2026-03-02T09:36:19Z"
   },
   {
-    "additions": 37,
-    "author": "omkar-334",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR refactors the `mamba` model as per #43979 cc @molbap <img width=\"859\" height=\"427\" alt=\"Screenshot 2026-02-15 at 5 12 43 AM\" src=\"https://github.com/user-attachments/assets/f23bb675-a9a3-4e21-a6c5-9804910301b4\" /> Note - Only 46 te\u2026",
-    "changed_files": 2,
+    "additions": 548,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "- adds `trainer_recipes.md` to show how to use other practical `Trainer` features outside of the basic training loop - updates hyperparam search docs - updates `optimizers.md` with how to customize it (prebuilt instances, passing a class +\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44003",
-    "created_at": "2026-02-14T23:46:10Z",
-    "deletions": 68,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44290",
+    "created_at": "2026-02-26T01:02:15Z",
+    "deletions": 210,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44003/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44003",
+    "files_url": "https://github.com/huggingface/transformers/pull/44290/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44290",
     "labels": [],
     "merged": false,
-    "number": 44003,
-    "review_comments_count": 0,
+    "number": 44290,
+    "review_comments_count": 17,
     "state": "open",
-    "title": "refactor output tracing in `mamba`",
-    "updated_at": "2026-02-17T07:40:50Z"
+    "title": "[docs] optimizers, hyperparam search, training features",
+    "updated_at": "2026-04-02T17:47:57Z"
   },
   {
-    "additions": 7,
-    "author": "omkar-334",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR refactors the `upernet` model as per #43979 cc @molbap <img width=\"856\" height=\"457\" alt=\"Screenshot 2026-02-15 at 4 51 03 AM\" src=\"https://github.com/user-attachments/assets/5dc478d7-d708-4296-a86b-c3bb252d0325\" />",
-    "changed_files": 1,
-    "cluster_id": "cluster-43998-11",
-    "cluster_ids": [
-      "cluster-43998-11"
-    ],
-    "cluster_role": "member",
+    "additions": 8,
+    "author": "haosenwang1018",
+    "author_association": "NONE",
+    "body_excerpt": "Replace bare except clauses with except Exception.",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44002",
-    "created_at": "2026-02-14T23:21:45Z",
-    "deletions": 20,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44002/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44002",
-    "labels": [],
-    "merged": false,
-    "number": 44002,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "refactor output tracing in `upernet`",
-    "updated_at": "2026-02-17T08:55:16Z"
-  },
-  {
-    "additions": 3,
-    "author": "omkar-334",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR refactors the`univnet` model as per #43979 cc @molbap <img width=\"848\" height=\"462\" alt=\"Screenshot 2026-02-15 at 4 19 00 AM\" src=\"https://github.com/user-attachments/assets/75848429-b9ff-49b3-a028-645aa67fc2ad\" />",
-    "changed_files": 1,
-    "cluster_id": "cluster-43998-11",
-    "cluster_ids": [
-      "cluster-43998-11"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44001",
-    "created_at": "2026-02-14T22:50:39Z",
-    "deletions": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44289",
+    "created_at": "2026-02-26T00:58:35Z",
+    "deletions": 8,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44001/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44001",
+    "files_url": "https://github.com/huggingface/transformers/pull/44289/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44289",
     "labels": [],
     "merged": false,
-    "number": 44001,
+    "number": 44289,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "refactor output tracing in `univnet`",
-    "updated_at": "2026-02-14T23:22:13Z"
+    "state": "closed",
+    "title": "fix: replace 8 bare except clauses with except Exception",
+    "updated_at": "2026-02-26T12:52:48Z"
   },
   {
-    "additions": 8,
-    "author": "omkar-334",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR refactors the `vision_text_dual_encoder` model issue as per #43979 cc @molbap <img width=\"876\" height=\"292\" alt=\"Screenshot 2026-02-15 at 4 09 07 AM\" src=\"https://github.com/user-attachments/assets/11147a56-993b-4abc-b07a-ec739a53d\u2026",
+    "additions": 1,
+    "author": "somAzzz",
+    "author_association": "FIRST_TIMER",
+    "body_excerpt": "# What does this PR do? When loading the new Qwen 3.5 models (e.g., 'Qwen/Qwen3.5-35B-A3B') using the 'transformers' (5.3.0.dev0), the initialization crashes with a 'TypeError' . **Error Traceback Context:** (APIServer pid=98544) File \"...\u2026",
     "changed_files": 1,
-    "cluster_id": "cluster-43998-11",
-    "cluster_ids": [
-      "cluster-43998-11"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/44000",
-    "created_at": "2026-02-14T22:44:14Z",
-    "deletions": 21,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/44000/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/44000",
-    "labels": [],
-    "merged": false,
-    "number": 44000,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "refactor output tracing in `vision_text_dual_encoder`",
-    "updated_at": "2026-02-17T07:46:33Z"
-  },
-  {
-    "additions": 10,
-    "author": "omkar-334",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR refactors the `mobilenet_v1` model as per #43979 cc @molbap",
-    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43999",
-    "created_at": "2026-02-14T22:20:19Z",
-    "deletions": 30,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44288",
+    "created_at": "2026-02-25T22:43:41Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43999/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43999",
+    "files_url": "https://github.com/huggingface/transformers/pull/44288/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44288",
     "labels": [],
     "merged": false,
-    "number": 43999,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "refactor output tracing in `mobilenet_v1`",
-    "updated_at": "2026-02-17T07:52:08Z"
+    "number": 44288,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "Fix TypeError in modeling_rope_utils.py when ignore_keys_at_rope_vali\u2026",
+    "updated_at": "2026-02-26T20:09:07Z"
   },
   {
-    "additions": 8,
-    "author": "omkar-334",
+    "additions": 11,
+    "author": "jashshah999",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR refactors the `timm_backbone` model issue as per #43979 cc @molbap <img width=\"856\" height=\"423\" alt=\"Screenshot 2026-02-15 at 4 10 15 AM\" src=\"https://github.com/user-attachments/assets/26237c3e-7b66-4f0d-a8b5-ffad6ee7c673\" />",
-    "changed_files": 1,
-    "cluster_id": "cluster-43998-11",
-    "cluster_ids": [
-      "cluster-43998-11"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43998",
-    "created_at": "2026-02-14T22:12:30Z",
-    "deletions": 19,
+    "body_excerpt": "# What does this PR do? Fixes mutable default arguments and unclosed file handles across several files. **Mutable defaults** (can cause shared state across calls): - `debug_utils.py`: `DebugUnderflowOverflow.__init__` `trace_batch_nums=[]`\u2026",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44287",
+    "created_at": "2026-02-25T22:23:20Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43998/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43998",
+    "files_url": "https://github.com/huggingface/transformers/pull/44287/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44287",
     "labels": [],
-    "merged": false,
-    "number": 43998,
+    "merged": true,
+    "number": 44287,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "refactor output tracing in `timm_backbone`",
-    "updated_at": "2026-02-21T07:29:47Z"
+    "state": "closed",
+    "title": "Fix mutable default arguments and resource leaks",
+    "updated_at": "2026-03-02T15:17:25Z"
   },
   {
-    "additions": 12,
-    "author": "karthiksuki",
-    "author_association": "FIRST_TIMER",
-    "body_excerpt": "# What does this PR do? This PR migrates the **RegNet** model to the standardized output collection interface as part of the ongoing refactoring effort in issue #43979. Specifically: - Adds the `_can_record_outputs` dictionary to `RegNetPr\u2026",
-    "changed_files": 1,
-    "cluster_id": "cluster-43979-28",
-    "cluster_ids": [
-      "cluster-43979-28"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43997",
-    "created_at": "2026-02-14T19:57:54Z",
-    "deletions": 45,
+    "additions": 31,
+    "author": "kathrynle20",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR adds on to the Fouroversix integration by adding support for quantized models such as the gpt-oss model by adding weight conversions and an additional config argument. Reference: https://github.com/huggingfa\u2026",
+    "changed_files": 3,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44286",
+    "created_at": "2026-02-25T22:15:15Z",
+    "deletions": 14,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43997/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43997",
+    "files_url": "https://github.com/huggingface/transformers/pull/44286/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44286",
     "labels": [],
-    "merged": false,
-    "number": 43997,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "Migrate RegNet to standardized output tracing",
-    "updated_at": "2026-02-14T20:10:22Z"
+    "merged": true,
+    "number": 44286,
+    "review_comments_count": 12,
+    "state": "closed",
+    "title": "Add future model support for Fouroversix",
+    "updated_at": "2026-03-04T16:28:13Z"
   },
   {
-    "additions": 44,
-    "author": "beelapranay",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Refactors FNet and CVT output tracing to use the standardized decorators where appropriate. 1. FNet now uses @capture_outputs with _can_record_outputs to collect hidden states. 2. CVT keeps manual hidden-state colle\u2026",
-    "changed_files": 2,
-    "cluster_id": "cluster-43979-28",
-    "cluster_ids": [
-      "cluster-43979-28"
-    ],
-    "cluster_role": "member",
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43996",
-    "created_at": "2026-02-14T17:55:31Z",
-    "deletions": 134,
+    "additions": 3484,
+    "author": "NielsRogge",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR adds the VidEoMT model, as described in [VidEoMT: Your ViT is Secretly Also a Video Segmentation Model](https://huggingface.co/papers/2602.17807). Gradio demo (running on ZeroGPU): https://huggingface.co/spa\u2026",
+    "changed_files": 17,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 23,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44285",
+    "created_at": "2026-02-25T19:24:39Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43996/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43996",
-    "labels": [],
-    "merged": false,
-    "number": 43996,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "Refactor FNet and CVT output tracing",
-    "updated_at": "2026-02-14T18:10:17Z"
+    "files_url": "https://github.com/huggingface/transformers/pull/44285/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44285",
+    "labels": [
+      "New model"
+    ],
+    "merged": true,
+    "number": 44285,
+    "review_comments_count": 57,
+    "state": "closed",
+    "title": "Add VidEoMT",
+    "updated_at": "2026-03-25T17:05:47Z"
   },
   {
-    "additions": 21,
-    "author": "akeemlh",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Refactors falcon in partial fulfillment of https://github.com/huggingface/transformers/issues/43979 ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's th\u2026",
-    "changed_files": 1,
+    "additions": 388,
+    "author": "paipeline",
+    "author_association": "NONE",
+    "body_excerpt": "## Description Fixes #44242 where Mixtral models do not compute auxiliary load balancing loss when `output_router_logits=False`, even when `router_aux_loss_coef > 0`. ## Problem According to the [Mixtral documentation](https://huggingface.\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43995",
-    "created_at": "2026-02-14T14:39:58Z",
-    "deletions": 87,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44284",
+    "created_at": "2026-02-25T18:38:15Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43995/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43995",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44284/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44284",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 43995,
+    "number": 44284,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Refactoring falcon model to match standardized output collection interface",
-    "updated_at": "2026-02-14T14:41:00Z"
+    "state": "closed",
+    "title": "Fix Mixtral auxiliary loss computation when output_router_logits=False",
+    "updated_at": "2026-02-26T12:41:46Z"
   },
   {
-    "additions": 12,
-    "author": "saurav0369",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### Docs: Fix Typos and Standardize Naming This PR fixes various typos, duplicate words, and capitalization inconsistencies across the documentation to improve readability and ensure professional branding. | File | Changes Made | | :--- |\u2026",
-    "changed_files": 7,
+    "additions": 1,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "Accidentally caused by #43325, wrong naming --> modular doesn't properly convert some files (e.g. kyutai) Also fixes red CI on main",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43993",
-    "created_at": "2026-02-14T10:11:40Z",
-    "deletions": 12,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44283",
+    "created_at": "2026-02-25T18:33:17Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43993/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43993",
+    "files_url": "https://github.com/huggingface/transformers/pull/44283/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44283",
     "labels": [],
     "merged": true,
-    "number": 43993,
+    "number": 44283,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "docs: fix typos across documentation files",
-    "updated_at": "2026-02-16T13:41:41Z"
+    "title": "[`Modular`] Fix file type regression",
+    "updated_at": "2026-02-25T20:04:41Z"
   },
   {
-    "additions": 3,
-    "author": "taovinci0",
-    "author_association": "NONE",
-    "body_excerpt": "Replaces mutable default dict `weights={}` with `weights=None` and initializes inside the function. The dict is mutated via `weights[full_key] = w`, which can cause unexpected behavior across multiple calls.",
+    "additions": 5,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "Response schema save-loading was broken in #40936, this PR restores it! I did most of this in #42300 but missed an issue with loading/saving.",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43991",
-    "created_at": "2026-02-14T00:00:00Z",
-    "deletions": 1,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44282",
+    "created_at": "2026-02-25T17:57:54Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43991/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43991",
+    "files_url": "https://github.com/huggingface/transformers/pull/44282/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44282",
     "labels": [],
-    "merged": false,
-    "number": 43991,
+    "merged": true,
+    "number": 44282,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: replace mutable default argument in _read_h5_weights",
-    "updated_at": "2026-02-16T11:18:06Z"
+    "title": "Restore response_schema saving-loading",
+    "updated_at": "2026-02-25T18:27:22Z"
   },
   {
-    "additions": 10,
-    "author": "Abhijeetsingh610",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes a crash in `AutoVideoProcessor` when `torchvision` is unavailable. `VIDEO_PROCESSOR_MAPPING_NAMES` can contain `None`, and `video_processor_class_from_name` was doing `if class_name in extractors`, which rais\u2026",
-    "changed_files": 2,
+    "additions": 1,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Its a very small fix for #44062",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43989",
-    "created_at": "2026-02-13T20:48:03Z",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44281",
+    "created_at": "2026-02-25T16:28:37Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43989/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43989",
+    "files_url": "https://github.com/huggingface/transformers/pull/44281/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44281",
     "labels": [],
-    "merged": false,
-    "number": 43989,
+    "merged": true,
+    "number": 44281,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Fix AutoVideoProcessor class lookup when torchvision is unavailable",
-    "updated_at": "2026-02-18T17:52:34Z"
+    "state": "closed",
+    "title": "Fix special token maps BC",
+    "updated_at": "2026-02-26T10:34:17Z"
   },
   {
-    "additions": 7,
-    "author": "harshaljanjani",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? The following failing tests were identified and fixed in this PR: \u2192 **LayoutXLM:** [This PR (rm slow tokenizers)](https://github.com/huggingface/transformers/pull/40936) changed [models/auto/tokenization_auto.py](\u2026",
-    "changed_files": 2,
+    "additions": 614,
+    "author": "RishabhMehra",
+    "author_association": "FIRST_TIMER",
+    "body_excerpt": "# What does this PR do? - Adds an opt-in use_fast_grouping flag to TokenClassificationPipeline to enable a NumPy-vectorised BIO grouping path (~5\u00d7 faster on long sequences) while keeping the legacy path as default. - Improves correctness:\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43988",
-    "created_at": "2026-02-13T20:03:28Z",
-    "deletions": 9,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44278",
+    "created_at": "2026-02-25T12:49:56Z",
+    "deletions": 63,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43988/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43988",
-    "labels": [],
-    "merged": true,
-    "number": 43988,
+    "files_url": "https://github.com/huggingface/transformers/pull/44278/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44278",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44278,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(testing): Fix LayoutXLM tokenization test and LightOnOCR SDPA flash test failures on main CI",
-    "updated_at": "2026-02-23T14:07:59Z"
+    "title": "[FEAT] Pipelines - Faster group_entities",
+    "updated_at": "2026-02-25T13:54:58Z"
   },
   {
-    "additions": 47,
-    "author": "winglian",
-    "author_association": "COLLABORATOR",
-    "body_excerpt": "# What does this PR do? Accelerator has a lot of other args that can be passed to it like fp8 support, etc, but requires extensive monkey patching downstream to make it work. This makes it easier to extend the accelerator args building met\u2026",
-    "changed_files": 1,
+    "additions": 105,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This patch makes the GLM-ASR doc example runnable by using `runnables` - see https://github.com/huggingface/doc-builder/blob/main/docs/runnable-code-blocks.md",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43987",
-    "created_at": "2026-02-13T18:51:56Z",
-    "deletions": 38,
+    "comments_count": 36,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44277",
+    "created_at": "2026-02-25T08:49:20Z",
+    "deletions": 19,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43987/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43987",
+    "files_url": "https://github.com/huggingface/transformers/pull/44277/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44277",
     "labels": [],
     "merged": true,
-    "number": 43987,
-    "review_comments_count": 2,
+    "number": 44277,
+    "review_comments_count": 6,
     "state": "closed",
-    "title": "split out accelerator args builder method",
-    "updated_at": "2026-02-16T14:59:03Z"
+    "title": "Use doc-builder runnable example for GLM-ASR",
+    "updated_at": "2026-04-02T16:16:55Z"
   },
   {
-    "additions": 1828,
-    "author": "winglian",
-    "author_association": "COLLABORATOR",
-    "body_excerpt": "# What does this PR do? The `_inner_training_loop` method has a lot going on which makes it hard to extend for downstream developers/libraries. This PR breaks it up into smaller well described methods that are chained in the training loop.\u2026",
-    "changed_files": 5,
+    "additions": 0,
+    "author": "vishalpatil-45",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? This PR addresses the performance regression where `import transformers` takes ~3.5s. The issue was caused by eager imports of heavy backend libraries (like torch/numpy) during the initial module load. By moving the\u2026",
+    "changed_files": 0,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43985",
-    "created_at": "2026-02-13T17:55:01Z",
-    "deletions": 251,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44275",
+    "created_at": "2026-02-25T08:27:32Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43985/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43985",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44275/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44275",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 43985,
+    "number": 44275,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Refactor inner training loop",
-    "updated_at": "2026-03-09T19:57:50Z"
+    "title": "[Fix] Restore lazy loading to improve import performance (#44273)",
+    "updated_at": "2026-02-25T20:37:18Z"
   },
   {
-    "additions": 2,
-    "author": "materight",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Removes unused `.squeeze` from VJEPA2 embeddings rotation. Currently the squeeze does nothing on video input since torch skips it if the dimension is not 1. Exporting to onnx and compiling to TensorRT instead fails\u2026",
-    "changed_files": 1,
+    "additions": 559,
+    "author": "paipeline",
+    "author_association": "NONE",
+    "body_excerpt": "## Description Fixes #44242 This PR resolves an issue where the auxiliary load balancing loss was not computed when `output_router_logits=False`, even when `router_aux_loss_coef != 0`. ## Problem The auxiliary loss computation was incorrec\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43984",
-    "created_at": "2026-02-13T17:53:16Z",
-    "deletions": 2,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44274",
+    "created_at": "2026-02-25T06:38:02Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43984/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43984",
-    "labels": [],
-    "merged": true,
-    "number": 43984,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Remove unused squeeze from VJEPA2 embeddings rotation",
-    "updated_at": "2026-02-13T21:56:01Z"
-  },
-  {
-    "additions": 62,
-    "author": "Aki-07",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 2,
-    "cluster_id": "cluster-43979-28",
-    "cluster_ids": [
-      "cluster-43979-28"
+    "files_url": "https://github.com/huggingface/transformers/pull/44274/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44274",
+    "labels": [
+      "Code agent slop"
     ],
-    "cluster_role": "canonical",
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43983",
-    "created_at": "2026-02-13T17:52:45Z",
-    "deletions": 188,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43983/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43983",
-    "labels": [],
-    "merged": true,
-    "number": 43983,
-    "review_comments_count": 4,
+    "merged": false,
+    "number": 44274,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Migrate GPT2 to standardized output capture decorators",
-    "updated_at": "2026-02-18T10:40:51Z"
+    "title": "Fix auxiliary load balancing loss computation when output_router_logits=False",
+    "updated_at": "2026-02-25T13:36:03Z"
   },
   {
     "additions": 1,
-    "author": "SunMarc",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR create a `.git-blame-ignore-revs` file to ignore the following commit https://github.com/huggingface/transformers/pull/43914 when using git blame.",
+    "author": "hangjun-ezra",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes a `TypeError: unsupported operand type(s) for |: 'list' and 'set'` in `RotaryEmbeddingConfigMixin.convert_rope_params_to_dict` when `ignore_keys_at_rope_validation` is a `list` instead of a `set`. ### Root ca\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43982",
-    "created_at": "2026-02-13T17:13:41Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44272",
+    "created_at": "2026-02-25T03:52:04Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43982/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43982",
+    "files_url": "https://github.com/huggingface/transformers/pull/44272/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44272",
     "labels": [],
     "merged": true,
-    "number": 43982,
+    "number": 44272,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "create .git-blame-ignore-revs file ",
-    "updated_at": "2026-02-16T13:08:22Z"
+    "title": "Fix TypeError in convert_rope_params_to_dict when ignore_keys is a list",
+    "updated_at": "2026-02-25T14:38:36Z"
   },
   {
-    "additions": 5,
-    "author": "Rocketknight1",
-    "author_association": "MEMBER",
-    "body_excerpt": "Some of our image processors have a fast return for images that are already square. However, this fast return skips the `background_color` check, which causes flaky test failures because the `test_padding` test uses `self.assertRaises()` t\u2026",
-    "changed_files": 1,
+    "additions": 1272,
+    "author": "balak4",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary - Add GreedyLR, a metric-based adaptive learning rate scheduler that adjusts the learning rate during training based on the current loss - Based on [\"Dynamic Learning Rate Scheduling based on Loss Changes Leads to Faster Converg\u2026",
+    "changed_files": 10,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43981",
-    "created_at": "2026-02-13T17:01:51Z",
-    "deletions": 0,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44271",
+    "created_at": "2026-02-25T01:40:57Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43981/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43981",
+    "files_url": "https://github.com/huggingface/transformers/pull/44271/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44271",
     "labels": [],
     "merged": true,
-    "number": 43981,
+    "number": 44271,
     "review_comments_count": 3,
     "state": "closed",
-    "title": "Fix early image processor return not raising error",
-    "updated_at": "2026-02-16T16:40:41Z"
+    "title": "Add GreedyLR adaptive learning rate scheduler",
+    "updated_at": "2026-03-18T18:45:46Z"
   },
   {
-    "additions": 3,
-    "author": "albertvillanova",
+    "additions": 88,
+    "author": "yonigozlan",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Escape `%` in `help` for `ArgumentParser.add_argument` to fix TypeError: > TypeError: not enough arguments for format string Context: https://docs.python.org/3/library/argparse.html#help > As the help string support\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? A lot of ProcessorsKwargs have incorrect/unspecified type hints in their ProcessorsKwargs TypedDict for their images_kwargs attribute. Functionnaly, this did not cause issues as \"_merge_kwargs\" automatically picks u\u2026",
+    "changed_files": 44,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43980",
-    "created_at": "2026-02-13T15:43:52Z",
-    "deletions": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44270",
+    "created_at": "2026-02-25T00:11:31Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43980/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43980",
+    "files_url": "https://github.com/huggingface/transformers/pull/44270/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44270",
     "labels": [],
     "merged": false,
-    "number": 43980,
+    "number": 44270,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Escape % in help for ArgumentParser.add_argument to fix TypeError",
-    "updated_at": "2026-02-17T17:30:07Z"
+    "state": "open",
+    "title": "Add correct typing to custom images_kwargs in ProcessorsKwargs",
+    "updated_at": "2026-02-25T01:12:06Z"
   },
   {
-    "additions": 0,
-    "author": "NicoSimo",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? Minor fix, resolves some older references to Python3.9. Fixes #43976 ## Before submitting - [x] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Did you read the\u2026",
-    "changed_files": 0,
+    "additions": 30,
+    "author": "yonigozlan",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This is a follow-up to https://github.com/huggingface/transformers/pull/43748, and will allow to have clickable links to the full modality kwargs when present in the docstring of a processor or image processor Cc @s\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43978",
-    "created_at": "2026-02-13T14:40:54Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44269",
+    "created_at": "2026-02-25T00:05:47Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43978/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43978",
+    "files_url": "https://github.com/huggingface/transformers/pull/44269/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44269",
     "labels": [],
-    "merged": false,
-    "number": 43978,
+    "merged": true,
+    "number": 44269,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Update references to Python3.9 to Python3.10. Resolves #43976",
-    "updated_at": "2026-02-13T17:00:07Z"
+    "title": "Add `ProcessingKwargs` `ImagesKwargs` etc. to docs",
+    "updated_at": "2026-02-27T19:03:15Z"
   },
   {
-    "additions": 48,
-    "author": "IlyasMoutawwakil",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 5,
+    "additions": 5,
+    "author": "ethanknights",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Some improvements to the `trainer.py` docs. ## Before submitting - [x] This PR fixes a typo or improves the docs. ## Who can review? Documentation: @stevhliu",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43977",
-    "created_at": "2026-02-13T13:18:49Z",
-    "deletions": 23,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44268",
+    "created_at": "2026-02-24T23:20:16Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43977/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43977",
+    "files_url": "https://github.com/huggingface/transformers/pull/44268/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44268",
     "labels": [],
     "merged": true,
-    "number": 43977,
-    "review_comments_count": 2,
+    "number": 44268,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "stable grouped_mm API",
-    "updated_at": "2026-02-16T11:09:33Z"
+    "title": "chore: fixes in `Trainer` class docs (`compute_loss` & `hyperparameter_search`)",
+    "updated_at": "2026-02-26T00:50:23Z"
   },
   {
-    "additions": 1659,
-    "author": "Abubakar-rashid",
+    "additions": 4,
+    "author": "manavshrivastavagit",
     "author_association": "NONE",
-    "body_excerpt": "This fixes issue #43957 reported by @xvdp, where models fail to load when using [torch.device('meta')](vscode-file://vscode-app/c:/Users/Priva/AppData/Local/Programs/Microsoft%20VS%20Code/_/resources/app/out/vs/code/electron-browser/workbe\u2026",
-    "changed_files": 28,
+    "body_excerpt": "## Summary - Update the `DocumentQuestionAnsweringPipeline` docstring to explicitly mention the task summary in the Transformers documentation. - Remove the stale TODO comment now that document question answering is covered in the task sum\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43974",
-    "created_at": "2026-02-13T11:27:42Z",
-    "deletions": 381,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44267",
+    "created_at": "2026-02-24T20:35:18Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43974/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43974",
+    "files_url": "https://github.com/huggingface/transformers/pull/44267/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44267",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 43974,
+    "number": 44267,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix: Replace torch.linspace().item() with python_linspace() to resolv\u2026",
-    "updated_at": "2026-02-16T13:46:49Z"
+    "title": "Docs: point DocumentQuestionAnswering pipeline to task summary",
+    "updated_at": "2026-02-25T13:34:48Z"
   },
   {
-    "additions": 3909,
-    "author": "MHRDYN7",
+    "additions": 27,
+    "author": "harshaljanjani",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Add support for lfm 2/2.5 audio models. (closes #43909)",
-    "changed_files": 16,
+    "body_excerpt": "### What does this PR do? The following issue was identified and fixed in this PR: \u2192 **Reasoning:** The impact of this fix goes beyond `Mask2Former` and `DeformableDetr` and should fix any model that uses `torch_compilable_check`. Most use\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43973",
-    "created_at": "2026-02-13T09:36:59Z",
-    "deletions": 0,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44266",
+    "created_at": "2026-02-24T20:02:06Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44266/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44266",
+    "labels": [],
+    "merged": true,
+    "number": 44266,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix(utils): Make torch_compilable_check compatible with torch.export strict mode",
+    "updated_at": "2026-02-26T09:42:47Z"
+  },
+  {
+    "additions": 90,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "As per title, WIP --> needs a test",
+    "changed_files": 36,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44264",
+    "created_at": "2026-02-24T18:06:58Z",
+    "deletions": 210,
     "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/43973/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43973",
+    "files_url": "https://github.com/huggingface/transformers/pull/44264/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44264",
     "labels": [],
     "merged": false,
-    "number": 43973,
-    "review_comments_count": 0,
+    "number": 44264,
+    "review_comments_count": 3,
     "state": "open",
-    "title": "Add lfm2.5 audio",
-    "updated_at": "2026-02-21T16:42:21Z"
+    "title": "[`Moe`] Enable aux loss automatically when in training + coef is not 0",
+    "updated_at": "2026-02-25T18:53:20Z"
   },
   {
-    "additions": 2219,
-    "author": "zucchini-nlp",
+    "additions": 5882,
+    "author": "SunMarc",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Following Ernie, we build 3d positions based on `mm_token_type_ids` and the models will return them by default from `processor`. We have a unified `get_vision_position` in the qwen2-vl model file, all other models j\u2026",
-    "changed_files": 45,
+    "body_excerpt": "# What does this PR do? This PR refactor the common tests that we have in Trainer. I've mainly did the following: - Split the tests that we have in `test_trainer.py` into multiple files. - Fix common tests that were failing in the CI",
+    "changed_files": 18,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 30,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43972",
-    "created_at": "2026-02-13T09:31:44Z",
-    "deletions": 1611,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44260",
+    "created_at": "2026-02-24T15:51:11Z",
+    "deletions": 6147,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43972/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43972",
+    "files_url": "https://github.com/huggingface/transformers/pull/44260/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44260",
     "labels": [],
     "merged": true,
-    "number": 43972,
-    "review_comments_count": 17,
+    "number": 44260,
+    "review_comments_count": 3,
     "state": "closed",
-    "title": ":rotating_light: Unify 3D position ids",
-    "updated_at": "2026-03-05T18:48:30Z"
+    "title": "Update common tests Trainer",
+    "updated_at": "2026-02-27T17:31:59Z"
   },
   {
-    "additions": 65,
-    "author": "caffeinism",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? 1. According to the paper, this model is designed to reference 250 contexts (10 seconds), but the current implementation uses DynamicCache without employing create_sliding_window_causal_mask, causing it to reference\u2026",
+    "additions": 1830,
+    "author": "winglian",
+    "author_association": "COLLABORATOR",
+    "body_excerpt": "# What does this PR do? This PR supersedes #43985 to replace the dataset/sampler/dataloader with a data producer that should allow us to more easily get to the next step of async training for RL. <!-- Congratulations! You've made it this f\u2026",
+    "changed_files": 6,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44259",
+    "created_at": "2026-02-24T15:01:56Z",
+    "deletions": 59,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44259/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44259",
+    "labels": [],
+    "merged": false,
+    "number": 44259,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Async data producer",
+    "updated_at": "2026-02-26T19:57:43Z"
+  },
+  {
+    "additions": 8,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "The old one has a merge conflict and it was easier to just mirror into a new branch / PR Note that this only affects the local big tests that I used to run on the A100s locally; not to be run with the CI (too big)",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43971",
-    "created_at": "2026-02-13T09:28:32Z",
-    "deletions": 3,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44258",
+    "created_at": "2026-02-24T15:00:29Z",
+    "deletions": 8,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43971/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43971",
-    "labels": [
-      "Audio"
-    ],
-    "merged": true,
-    "number": 43971,
-    "review_comments_count": 2,
+    "files_url": "https://github.com/huggingface/transformers/pull/44258/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44258",
+    "labels": [],
+    "merged": false,
+    "number": 44258,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[Mimi] Calibrate to ensure encoder streaming performs correctly",
-    "updated_at": "2026-02-23T14:20:01Z"
+    "title": "[`Ernie 4.5 VL Moe`] Change revision",
+    "updated_at": "2026-03-14T19:59:05Z"
   },
   {
-    "additions": 542,
-    "author": "jackcook",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR adds support for quantization with [Four Over Six (4/6)](https://github.com/mit-han-lab/fouroversix). Our library currently focuses on quantizing linear layers to NVFP4, including weight, activation, and gra\u2026",
-    "changed_files": 15,
+    "additions": 3,
+    "author": "winglian",
+    "author_association": "COLLABORATOR",
+    "body_excerpt": "# What does this PR do? When post training using context parallelism, some processes may have their chunk of the sample input masked out leading to a NaN loss for that process. Using `nanmean` allows us to keep the real loss that isn't `Na\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 15,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43970",
-    "created_at": "2026-02-13T05:15:44Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44257",
+    "created_at": "2026-02-24T14:56:42Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43970/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43970",
+    "files_url": "https://github.com/huggingface/transformers/pull/44257/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44257",
     "labels": [],
-    "merged": true,
-    "number": 43970,
-    "review_comments_count": 28,
-    "state": "closed",
-    "title": "Add Four Over Six quantization integration",
-    "updated_at": "2026-02-25T09:30:09Z"
+    "merged": false,
+    "number": 44257,
+    "review_comments_count": 5,
+    "state": "open",
+    "title": "use nanmean for aggregating loss",
+    "updated_at": "2026-02-25T17:01:08Z"
   },
   {
-    "additions": 6,
-    "author": "jp1924",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "additions": 10,
+    "author": "albertvillanova",
+    "author_association": "MEMBER",
+    "body_excerpt": "Fix CLI NameError: name 'TypeAdapter' is not defined: - Do not evaluate type annotations in CLI serve ### Problem Calling the CLI raises NameError: > NameError: name 'TypeAdapter' is not defined ```bash transformers --help ``` ```python Tr\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43969",
-    "created_at": "2026-02-13T03:47:24Z",
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44256",
+    "created_at": "2026-02-24T14:54:49Z",
     "deletions": 9,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43969/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43969",
+    "files_url": "https://github.com/huggingface/transformers/pull/44256/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44256",
     "labels": [],
-    "merged": false,
-    "number": 43969,
-    "review_comments_count": 1,
+    "merged": true,
+    "number": 44256,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix for 2D weight error in embedding layer with ZeRO3",
-    "updated_at": "2026-02-20T07:32:14Z"
+    "title": "Fix CLI NameError: name 'TypeAdapter' is not defined",
+    "updated_at": "2026-03-12T07:57:14Z"
   },
   {
-    "additions": 90,
-    "author": "qgallouedec",
+    "additions": 404,
+    "author": "itazap",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Enhances `get_json_schema()` and `render_jinja_template()` to support instance methods, class methods, and static methods, not just plain functions. Previously, `get_json_schema()` only worked with standalone functi\u2026",
-    "changed_files": 3,
+    "body_excerpt": "## What this PR does Given he different issues that were noticed by @hmellor on vLLM, we wanted to make sure we did not end up with crazy breaks. We ran a full test suite (code can be found in #44298) and the results showed 22 model conver\u2026",
+    "changed_files": 24,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43968",
-    "created_at": "2026-02-13T01:43:51Z",
-    "deletions": 3,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44255",
+    "created_at": "2026-02-24T14:17:00Z",
+    "deletions": 205,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43968/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43968",
+    "files_url": "https://github.com/huggingface/transformers/pull/44255/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44255",
     "labels": [],
     "merged": true,
-    "number": 43968,
-    "review_comments_count": 2,
+    "number": 44255,
+    "review_comments_count": 25,
     "state": "closed",
-    "title": "Enhance JSON schema generation to support instance, static, and class methods",
-    "updated_at": "2026-02-13T18:01:56Z"
+    "title": "[vllm + v5 fix] handle TokenizersBackend fallback properly for v5",
+    "updated_at": "2026-03-23T11:07:37Z"
   },
-  {
-    "additions": 3,
-    "author": "shtse8",
+  {
+    "additions": 16,
+    "author": "mario-sanz",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes the `AttributeError: 'List' object has no attribute 'dtype'` crash in `run_classification.py` when loading JSON data with list-type labels for multi-label classification (reported in #43116). ### Problem When\u2026",
-    "changed_files": 1,
+    "body_excerpt": "## What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully refle\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43967",
-    "created_at": "2026-02-12T23:42:11Z",
-    "deletions": 2,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44254",
+    "created_at": "2026-02-24T13:54:30Z",
+    "deletions": 13,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43967/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43967",
+    "files_url": "https://github.com/huggingface/transformers/pull/44254/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44254",
     "labels": [],
     "merged": false,
-    "number": 43967,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "Fix AttributeError in run_classification.py when detecting multi-label data",
-    "updated_at": "2026-02-12T23:42:11Z"
+    "number": 44254,
+    "review_comments_count": 5,
+    "state": "closed",
+    "title": "Fix fast tokenizers overwriting custom `pre_tokenizer` from `tokenizer.json`",
+    "updated_at": "2026-02-26T08:45:56Z"
   },
   {
-    "additions": 10,
-    "author": "shtse8",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes #43867 When a model has sub-models with different naming conventions (e.g. `model.layers.26.self_attn.o_proj.weight` vs `desc_model.roberta.encoder.layers.7.norm1.weight`), `dot_natural_key` can produce lists\u2026",
+    "additions": 9,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "`create_import_structure_from_path` does some redundant `os` calls, so I'm experimenting with changes to see if we can speed up loading a lot. Related to #44246",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43966",
-    "created_at": "2026-02-12T23:40:45Z",
-    "deletions": 5,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44253",
+    "created_at": "2026-02-24T13:03:40Z",
+    "deletions": 15,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43966/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43966",
+    "files_url": "https://github.com/huggingface/transformers/pull/44253/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44253",
     "labels": [],
     "merged": true,
-    "number": 43966,
-    "review_comments_count": 2,
+    "number": 44253,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix TypeError in dot_natural_key when state_dict keys have mixed types at same position",
-    "updated_at": "2026-02-13T17:39:52Z"
+    "title": "Speed create_import_structure up with os.scandir()",
+    "updated_at": "2026-03-10T12:49:42Z"
   },
   {
-    "additions": 77,
-    "author": "stevhliu",
+    "additions": 718,
+    "author": "zucchini-nlp",
     "author_association": "MEMBER",
-    "body_excerpt": "updates `tokenizer_summary.md`: - condense intro and subword tokenization sections since this doc is mostly about BPE/Unigram/WordPiece - removes some redundant and introductory motivation content and links to the course for more info - pl\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? Deprecate timm backbone in favor of keeping all models within one `timm` folder, similar to other vision models. A backbone is just a variation of `PreTrainedModel`",
+    "changed_files": 61,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43965",
-    "created_at": "2026-02-12T22:08:33Z",
-    "deletions": 200,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44252",
+    "created_at": "2026-02-24T13:00:59Z",
+    "deletions": 772,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43965/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43965",
+    "files_url": "https://github.com/huggingface/transformers/pull/44252/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44252",
     "labels": [],
-    "merged": true,
-    "number": 43965,
-    "review_comments_count": 2,
-    "state": "closed",
-    "title": "[docs] tokenizer summary",
-    "updated_at": "2026-02-17T18:17:25Z"
+    "merged": false,
+    "number": 44252,
+    "review_comments_count": 9,
+    "state": "open",
+    "title": "Timm unification continued",
+    "updated_at": "2026-02-26T13:35:44Z"
   },
   {
-    "additions": 72,
-    "author": "tohtana",
+    "additions": 1951,
+    "author": "Sai-Suraj-27",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR fixes a ZeRO-3 checkpoint loading failure in Transformers\u2019 conversion-mapped loading path. In affected cases, many parameters are reported as missing and are actually not restored from checkpoint (they get reinitialized). `transfor\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? Model Page: https://huggingface.co/jinaai/jina-embeddings-v3 Model Paper: https://huggingface.co/papers/2409.10173 Downloads last month > **5.3M** Completes Part of https://github.com/huggingface/transformers/issues\u2026",
+    "changed_files": 13,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43964",
-    "created_at": "2026-02-12T22:01:48Z",
-    "deletions": 1,
+    "comments_count": 29,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44251",
+    "created_at": "2026-02-24T12:56:24Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43964/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43964",
-    "labels": [],
-    "merged": false,
-    "number": 43964,
-    "review_comments_count": 1,
+    "files_url": "https://github.com/huggingface/transformers/pull/44251/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44251",
+    "labels": [
+      "New model"
+    ],
+    "merged": true,
+    "number": 44251,
+    "review_comments_count": 74,
     "state": "closed",
-    "title": "Fix ZeRO-3 conversion-mapped checkpoint loading",
-    "updated_at": "2026-02-13T22:58:54Z"
+    "title": "Add `Jina-Embeddings-V3` Model",
+    "updated_at": "2026-03-19T10:07:57Z"
   },
   {
     "additions": 5,
-    "author": "DimiChatzipavlis",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Refactors usages of `file.readlines()` to more Pythonic equivalents (`list(file)` or direct iteration) in core tokenization and utility files. **Key Improvements:** 1. **Memory Optimization:** Replaced `list(f.readl\u2026",
-    "changed_files": 4,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR fixes https://github.com/huggingface/transformers/pull/43806#discussion_r2834269455. We removed `self.report_to == \"all\"` functionality by mistake. Adding it back !",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43963",
-    "created_at": "2026-02-12T21:16:47Z",
-    "deletions": 5,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44250",
+    "created_at": "2026-02-24T12:38:21Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43963/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43963",
+    "files_url": "https://github.com/huggingface/transformers/pull/44250/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44250",
     "labels": [],
-    "merged": false,
-    "number": 43963,
+    "merged": true,
+    "number": 44250,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Refactor redundant .readlines() with list()",
-    "updated_at": "2026-02-13T12:49:22Z"
+    "title": "fix regression report_to \"all\"",
+    "updated_at": "2026-02-24T12:55:06Z"
   },
   {
-    "additions": 59,
-    "author": "DimiChatzipavlis",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Refactors `tests/cli/test_serve.py` to use native Python `assert` statements instead of legacy `unittest.TestCase` assertion methods. This modernization aligns the CLI tests with `pytest` best practices, enabling: -\u2026",
+    "additions": 9,
+    "author": "Ryan-J-MAX",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR fix? This PR adds backward compatibility for the deprecated `grouped_entities` parameter in the `TokenClassificationPipeline`. ## Problem The `grouped_entities` parameter was deprecated in favor of `aggregation_strateg\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43962",
-    "created_at": "2026-02-12T18:02:43Z",
-    "deletions": 64,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44249",
+    "created_at": "2026-02-24T10:48:54Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43962/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43962",
+    "files_url": "https://github.com/huggingface/transformers/pull/44249/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44249",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 43962,
-    "review_comments_count": 2,
+    "number": 44249,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Refactor CLI tests using native pytest assertions",
-    "updated_at": "2026-02-13T12:49:11Z"
+    "title": "fix: add backward compatibility for grouped_entities parameter",
+    "updated_at": "2026-02-24T12:31:26Z"
   },
   {
     "additions": 12,
-    "author": "DimiChatzipavlis",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes a common Python pitfall regarding **mutable default arguments**. In Python, default arguments are evaluated only once at function definition time. If a mutable object (like a `list`) is used as a default, that\u2026",
+    "author": "yonigozlan",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fix backward compatibility with remote code for old processors not defining valid_kwargs (e.g. phi4) Cc @zucchini-nlp Fix `test_processor_override` for phi3v and phi4 in vllm @hmellor",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44245",
+    "created_at": "2026-02-23T21:47:19Z",
+    "deletions": 4,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44245/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44245",
+    "labels": [],
+    "merged": true,
+    "number": 44245,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "Fix image processors `from_dict` backward compatibility with old remote code",
+    "updated_at": "2026-02-24T15:17:37Z"
+  },
+  {
+    "additions": 63,
+    "author": "thakoreh",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Fixes #44242 Load balancing loss was not being added when `output_router_logits=False` in Mixtral models. ## Changes - Fixed loss calculation to include load balancing even when router logits are not output - Added test case ##\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43961",
-    "created_at": "2026-02-12T17:31:19Z",
-    "deletions": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44243",
+    "created_at": "2026-02-23T21:27:09Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43961/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43961",
+    "files_url": "https://github.com/huggingface/transformers/pull/44243/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44243",
     "labels": [],
     "merged": false,
-    "number": 43961,
+    "number": 44243,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Replace mutable default arguments with None",
-    "updated_at": "2026-02-13T12:45:04Z"
+    "state": "closed",
+    "title": "fix: add load balancing loss when output_router_logits=False",
+    "updated_at": "2026-02-23T21:54:11Z"
   },
   {
-    "additions": 1194,
-    "author": "remi-or",
-    "author_association": "MEMBER",
-    "body_excerpt": "# Summary This PR adds the asynchronous batching feature to continuous batching (CB). Asynchronous batching, through the use of more VRAM and CUDA streams and events, greatly reduces the CPU overhead of preparing and updating batches by hi\u2026",
-    "changed_files": 11,
+    "additions": 9,
+    "author": "yushiran",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary Adds missing `-> bool`, `-> int`, and `-> str | None` return type annotations to public utility functions in `utils/generic.py`, making them consistent with the newer `is_timm_config_dict` and `is_timm_local_checkpoint` function\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43960",
-    "created_at": "2026-02-12T17:20:38Z",
-    "deletions": 666,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44241",
+    "created_at": "2026-02-23T19:50:05Z",
+    "deletions": 9,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43960/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43960",
+    "files_url": "https://github.com/huggingface/transformers/pull/44241/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44241",
     "labels": [],
     "merged": true,
-    "number": 43960,
-    "review_comments_count": 39,
-    "state": "closed",
-    "title": "[CB] [Major] Asynchronous batching",
-    "updated_at": "2026-02-23T10:11:28Z"
+    "number": 44241,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "fix: add missing return type annotations to type-checking utilities in generic.py",
+    "updated_at": "2026-02-24T13:27:11Z"
   },
   {
-    "additions": 32,
-    "author": "DimiChatzipavlis",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes potential `UnicodeDecodeError` on Windows (and other environments where the default encoding is not UTF-8) by enforcing `encoding=\"utf-8\"` in standard `open()` calls across the core library. ## Modifications A\u2026",
-    "changed_files": 10,
+    "additions": 2,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Makes sure `find_bad_commit` always return the result `dict`",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43959",
-    "created_at": "2026-02-12T17:00:15Z",
-    "deletions": 32,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44240",
+    "created_at": "2026-02-23T19:12:49Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43959/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43959",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 43959,
+    "files_url": "https://github.com/huggingface/transformers/pull/44240/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44240",
+    "labels": [],
+    "merged": true,
+    "number": 44240,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Enforce explicit UTF-8 encoding in core Library to prevent Windows crashes",
-    "updated_at": "2026-02-13T12:50:43Z"
+    "title": "Fix return value - fixes #44238",
+    "updated_at": "2026-02-24T13:02:59Z"
   },
   {
-    "additions": 2,
-    "author": "MekkCyber",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Update the quantization docker file to add kernels",
-    "changed_files": 1,
+    "additions": 253,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "part 2 of refactoring the training docs adds new dedicated guide to callbacks and data collators todo: - [x] backlink to `## Next steps` in `trainer.md` once https://github.com/huggingface/transformers/pull/44185 is merged",
+    "changed_files": 7,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43958",
-    "created_at": "2026-02-12T16:31:42Z",
-    "deletions": 2,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44239",
+    "created_at": "2026-02-23T18:54:55Z",
+    "deletions": 47,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43958/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43958",
+    "files_url": "https://github.com/huggingface/transformers/pull/44239/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44239",
     "labels": [],
-    "merged": false,
-    "number": 43958,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44239,
+    "review_comments_count": 10,
     "state": "closed",
-    "title": "[kernels] update docker file",
-    "updated_at": "2026-03-25T10:31:24Z"
+    "title": "[docs] callbacks and collators",
+    "updated_at": "2026-02-24T22:12:46Z"
   },
   {
-    "additions": 8,
-    "author": "Cyrilvallez",
+    "additions": 1,
+    "author": "eustlb",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Supersedes https://github.com/huggingface/transformers/pull/43913 After scanning the collection [here](https://huggingface.co/collections/Qwen/qwen3-vl), all models already have merged experts but need a transpose",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? small nit but will be misleading if not fixed",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43956",
-    "created_at": "2026-02-12T16:16:08Z",
-    "deletions": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44237",
+    "created_at": "2026-02-23T17:52:17Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43956/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43956",
+    "files_url": "https://github.com/huggingface/transformers/pull/44237/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44237",
     "labels": [],
-    "merged": false,
-    "number": 43956,
+    "merged": true,
+    "number": 44237,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix qwen3-vl-moe weight mapping",
-    "updated_at": "2026-02-12T17:29:22Z"
+    "title": "[mimi] nit",
+    "updated_at": "2026-02-24T15:43:55Z"
   },
   {
-    "additions": 5,
-    "author": "MekkCyber",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes kernel versions for new builds with torch 2.10",
-    "changed_files": 1,
+    "additions": 109,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Supersedes https://github.com/huggingface/transformers/pull/43847 When using zero3 + from_config, the model was incorrectly initialized as we were not gathering the params. Added a test also. cc @tohtana",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43955",
-    "created_at": "2026-02-12T16:01:58Z",
-    "deletions": 5,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44236",
+    "created_at": "2026-02-23T17:20:01Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43955/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43955",
+    "files_url": "https://github.com/huggingface/transformers/pull/44236/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44236",
     "labels": [],
     "merged": true,
-    "number": 43955,
+    "number": 44236,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[kernels] fix kernel versions ",
-    "updated_at": "2026-02-13T07:32:33Z"
+    "title": "fix zero3 init config",
+    "updated_at": "2026-02-27T11:36:19Z"
   },
   {
-    "additions": 3,
-    "author": "Rocketknight1",
+    "additions": 1,
+    "author": "itazap",
     "author_association": "MEMBER",
-    "body_excerpt": "The LASR model uses `layerdrop`, which we forgot to disable in the tests. Since the tests only have 2 hidden layers, and the default layerdrop chance is `0.1`, this means there's a `0.1^2 = 1%` chance in any test that doesn't call `model.e\u2026",
+    "body_excerpt": "UPDATE TO: https://github.com/huggingface/transformers/pull/44179/changes Models with incorrect tokenizer_class in tokenization_config.json that should use TokenziersBackend",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43954",
-    "created_at": "2026-02-12T14:45:01Z",
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44235",
+    "created_at": "2026-02-23T17:06:54Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43954/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43954",
+    "files_url": "https://github.com/huggingface/transformers/pull/44235/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44235",
     "labels": [],
     "merged": true,
-    "number": 43954,
+    "number": 44235,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix LASR test layerdrop issue",
-    "updated_at": "2026-02-12T17:03:42Z"
+    "title": "update fuyu tokenizer class",
+    "updated_at": "2026-02-23T17:36:22Z"
   },
   {
-    "additions": 1,
-    "author": "zucchini-nlp",
+    "additions": 249,
+    "author": "yonigozlan",
     "author_association": "MEMBER",
-    "body_excerpt": "Fixes https://github.com/huggingface/transformers/issues/43931, no transpose needed after standardizing the model impl to inherit from Qwen3-MoE",
-    "changed_files": 1,
+    "body_excerpt": "Cc @zucchini-nlp",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43953",
-    "created_at": "2026-02-12T14:43:59Z",
-    "deletions": 15,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44234",
+    "created_at": "2026-02-23T17:03:05Z",
+    "deletions": 55,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43953/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43953",
+    "files_url": "https://github.com/huggingface/transformers/pull/44234/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44234",
     "labels": [],
-    "merged": false,
-    "number": 43953,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44234,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Fix conversion mapping for Qwen3VL-MoE",
-    "updated_at": "2026-02-16T18:02:05Z"
+    "title": "Add processing tests for phi4 multimodal",
+    "updated_at": "2026-02-23T22:08:11Z"
   },
   {
-    "additions": 78,
-    "author": "zucchini-nlp",
+    "additions": 219,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/trl/issues/5088 We shouldn't use `rope_deltas` from prev `generation/forward` call if this is a new generation from scratch. This is already correctly implemented in `compute_3d_\u2026",
-    "changed_files": 14,
+    "body_excerpt": "Extends `ty` coverage to `src/transformers/generation` - Added a dedicated type-check wrapper script: `utils/check_types.py`. - Updated `Makefile` to run `ty` checks through the wrapper in both `style` and `check-repo`. - merged all typing\u2026",
+    "changed_files": 15,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43952",
-    "created_at": "2026-02-12T14:34:52Z",
-    "deletions": 31,
+    "comments_count": 12,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44233",
+    "created_at": "2026-02-23T16:23:24Z",
+    "deletions": 101,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43952/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43952",
+    "files_url": "https://github.com/huggingface/transformers/pull/44233/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44233",
     "labels": [],
     "merged": true,
-    "number": 43952,
-    "review_comments_count": 0,
+    "number": 44233,
+    "review_comments_count": 33,
     "state": "closed",
-    "title": "Fix qwen-vl position ids when generating several times",
-    "updated_at": "2026-02-12T16:22:39Z"
+    "title": "chore(typing): Add type checking to `src/transformers/generation`",
+    "updated_at": "2026-03-04T17:24:37Z"
   },
   {
-    "additions": 4,
-    "author": "lordaarush",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes #43883 After #42270, `all_tied_weights_keys` is initialized in `post_init()`, but remote models loaded with `trust_remote_code=True` don't always call `post_init()` properly, causing `AttributeError` when load\u2026",
-    "changed_files": 1,
+    "additions": 11,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? per https://code.claude.com/docs/en/claude-code-on-the-web#best-practices `CLAUDE.md` can alias directly into `AGENTS.md`",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43951",
-    "created_at": "2026-02-12T14:32:00Z",
-    "deletions": 0,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44232",
+    "created_at": "2026-02-23T16:10:15Z",
+    "deletions": 109,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43951/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43951",
+    "files_url": "https://github.com/huggingface/transformers/pull/44232/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44232",
     "labels": [],
-    "merged": false,
-    "number": 43951,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44232,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Fix AttributeError for remote models with trust_remote_code=True",
-    "updated_at": "2026-02-12T15:39:38Z"
+    "title": "chore: added CLAUDE.md alias",
+    "updated_at": "2026-02-24T14:48:36Z"
   },
   {
-    "additions": 61,
-    "author": "DimiChatzipavlis",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes a collection of spelling errors found throughout `src/transformers` in docstrings, comments, and user-facing error messages. ## Modifications Corrected the following typos across multiple files in `src/transfo\u2026",
-    "changed_files": 45,
+    "additions": 413,
+    "author": "IlyasMoutawwakil",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43949",
-    "created_at": "2026-02-12T14:15:42Z",
-    "deletions": 61,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44231",
+    "created_at": "2026-02-23T15:45:47Z",
+    "deletions": 578,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43949/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43949",
+    "files_url": "https://github.com/huggingface/transformers/pull/44231/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44231",
     "labels": [],
-    "merged": true,
-    "number": 43949,
-    "review_comments_count": 5,
+    "merged": true,
+    "number": 44231,
+    "review_comments_count": 18,
     "state": "closed",
-    "title": "Fix typos in docstrings, comments, and error messages",
-    "updated_at": "2026-02-12T16:26:10Z"
+    "title": "[Performance] FP8 Grouped and Batched Matmuls",
+    "updated_at": "2026-03-11T08:51:02Z"
   },
   {
-    "additions": 147,
-    "author": "zucchini-nlp",
+    "additions": 4,
+    "author": "alvarobartt",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixes `get_num_of_image_tokens` in idefics3 and adds a test. Aloong the way fixes a few more models Reported in https://github.com/vllm-project/vllm/pull/34358",
-    "changed_files": 25,
+    "body_excerpt": "# What does this PR do? This PR adds the missing backtick (`) on the `AnyToAnyPipeline.__call__` docstrings, as those were showing as in the screenshot below instead. <img width=\"1023\" height=\"400\" alt=\"image\" src=\"https://github.com/user-\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43948",
-    "created_at": "2026-02-12T13:52:37Z",
-    "deletions": 69,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44229",
+    "created_at": "2026-02-23T15:25:47Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43948/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43948",
+    "files_url": "https://github.com/huggingface/transformers/pull/44229/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44229",
     "labels": [],
     "merged": true,
-    "number": 43948,
+    "number": 44229,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix `get_number_of_image_tokens`",
-    "updated_at": "2026-02-12T16:23:37Z"
+    "title": "Add missing backtick in `AnyToAnyPipeline.__call__` docstring",
+    "updated_at": "2026-02-23T19:21:08Z"
   },
   {
-    "additions": 42,
-    "author": "casinca",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Hello, This is a complementary PR to #42769 (not yet pushed, at the time of writing) in order to fix #42754 Tests passed (at least the ones with a test.) <!-- Congratulations! You've made it this far! You're not qui\u2026",
-    "changed_files": 7,
+    "additions": 35,
+    "author": "JonoLF",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43947",
-    "created_at": "2026-02-12T13:20:24Z",
-    "deletions": 21,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44228",
+    "created_at": "2026-02-23T15:09:05Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43947/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43947",
+    "files_url": "https://github.com/huggingface/transformers/pull/44228/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44228",
     "labels": [],
-    "merged": true,
-    "number": 43947,
+    "merged": false,
+    "number": 44228,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: Better weight decay exclusion in `run_*_no\u2011trainer.py` examples",
-    "updated_at": "2026-02-12T16:24:43Z"
+    "state": "open",
+    "title": "[Quantisation] account for nested tensors from quantisers",
+    "updated_at": "2026-03-17T11:57:53Z"
   },
   {
-    "additions": 5,
-    "author": "ydshieh",
+    "additions": 21,
+    "author": "remi-or",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fix docker files: some issues for `[dev-torch]` and `kernels`",
+    "body_excerpt": "This PR adds a logging message when infering the behavior of use async and fixes an error when evicting a graph from the graph buffer.",
     "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43946",
-    "created_at": "2026-02-12T13:01:56Z",
-    "deletions": 5,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44227",
+    "created_at": "2026-02-23T14:53:53Z",
+    "deletions": 13,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43946/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43946",
+    "files_url": "https://github.com/huggingface/transformers/pull/44227/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44227",
     "labels": [],
     "merged": true,
-    "number": 43946,
-    "review_comments_count": 0,
+    "number": 44227,
+    "review_comments_count": 9,
     "state": "closed",
-    "title": "Fix docker files",
-    "updated_at": "2026-02-12T13:11:21Z"
+    "title": "[CB] Small fixes",
+    "updated_at": "2026-03-03T13:40:10Z"
   },
   {
-    "additions": 25,
+    "additions": 86,
     "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. Some parts of the computation were not so exact. It should not change anything in general, but may improve perfs on constrained environments",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? As per the title. It looks like some models (xlnet and kosmos2_5) and most audio models sometimes rely on the full previous input_ids to prepare inputs. Note that this cannot be compatible with restarting generation\u2026",
+    "changed_files": 12,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43945",
-    "created_at": "2026-02-12T12:56:19Z",
-    "deletions": 9,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44226",
+    "created_at": "2026-02-23T13:27:23Z",
+    "deletions": 66,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43945/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43945",
+    "files_url": "https://github.com/huggingface/transformers/pull/44226/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44226",
     "labels": [],
     "merged": true,
-    "number": 43945,
-    "review_comments_count": 0,
+    "number": 44226,
+    "review_comments_count": 17,
     "state": "closed",
-    "title": "Improve memory allocator during loading",
-    "updated_at": "2026-02-13T11:25:07Z"
+    "title": "[generate] Always pass full input_ids in `prepare_inputs_for_generation`",
+    "updated_at": "2026-02-24T10:45:49Z"
   },
   {
-    "additions": 4,
-    "author": "Rocketknight1",
+    "additions": 169,
+    "author": "zucchini-nlp",
     "author_association": "MEMBER",
-    "body_excerpt": "We have flaky test failures in `tests/models/qwen3_omni_moe/test_modeling_qwen3_omni_moe.py::Qwen3OmniMoeThinkerForConditionalGenerationModelTest::test_generate_continue_from_past_key_values`. The cause is that the logic in this test drops\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? As per title, we weren't running these test for some time because they were being filtered into `non-model` tests. But `non-model` doesn't run tests that are marked as generation, so I moved it back to `generation`\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43944",
-    "created_at": "2026-02-12T12:55:52Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44225",
+    "created_at": "2026-02-23T12:09:40Z",
+    "deletions": 270,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43944/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43944",
+    "files_url": "https://github.com/huggingface/transformers/pull/44225/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44225",
     "labels": [],
     "merged": true,
-    "number": 43944,
-    "review_comments_count": 2,
+    "number": 44225,
+    "review_comments_count": 18,
     "state": "closed",
-    "title": "Fix flaky test for multimodal LLMs",
-    "updated_at": "2026-02-12T13:30:17Z"
+    "title": "Fix generation integration tests",
+    "updated_at": "2026-02-25T09:19:39Z"
   },
   {
-    "additions": 6,
-    "author": "DimiChatzipavlis",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Replaces legacy `.format()` calls with f-strings in several model conversion scripts (`convert_*.py`). ## Modifications Used `flynt` to apply safe transformations to string literals in: - `src/transformers/models/im\u2026",
-    "changed_files": 6,
+    "additions": 5,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Small fixes after https://github.com/huggingface/transformers/pull/44130. See https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/8785954cca2fdca181de0b9567059471bcadb959/2026-02-21/ci_resu\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43943",
-    "created_at": "2026-02-12T11:49:23Z",
-    "deletions": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44224",
+    "created_at": "2026-02-23T10:48:19Z",
+    "deletions": 8,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43943/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43943",
+    "files_url": "https://github.com/huggingface/transformers/pull/44224/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44224",
     "labels": [],
-    "merged": true,
-    "number": 43943,
-    "review_comments_count": 1,
+    "merged": false,
+    "number": 44224,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Modernize string formatting (f-strings) in conversion scripts",
-    "updated_at": "2026-02-12T14:20:49Z"
+    "title": "Small fixes",
+    "updated_at": "2026-02-24T10:06:14Z"
   },
   {
-    "additions": 20,
-    "author": "Cyrilvallez",
+    "additions": 1,
+    "author": "albertvillanova",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Re-add the overwritten test that was mistakenly removed in https://github.com/huggingface/transformers/pull/43916",
+    "body_excerpt": "Fix type of `TrainingArguments.logging_steps`. This PR makes a minor update to the `TrainingArguments` class, so `logging_steps` parameter accepts both integers and floats, rather than only floats. Note these are the expected types in the\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43942",
-    "created_at": "2026-02-12T11:20:08Z",
-    "deletions": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44223",
+    "created_at": "2026-02-23T08:50:04Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43942/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43942",
+    "files_url": "https://github.com/huggingface/transformers/pull/44223/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44223",
     "labels": [],
-    "merged": true,
-    "number": 43942,
+    "merged": false,
+    "number": 44223,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix bark test",
-    "updated_at": "2026-02-12T11:34:22Z"
+    "title": "Fix type of TrainingArguments.logging_steps",
+    "updated_at": "2026-02-23T09:08:18Z"
   },
   {
-    "additions": 113,
-    "author": "DimiChatzipavlis",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Adds explicit `encoding=\"utf-8\"` to file I/O operations in several `examples/pytorch/` scripts. ## The Problem On Windows, `open()` defaults to the system encoding (often `cp1252`). This causes crashes (`UnicodeDeco\u2026",
-    "changed_files": 9,
+    "additions": 1,
+    "author": "matisgagneux21",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary - fix a typo in the Italian add-new-model guide: `docstirng` -> `docstring`. ## Why Small docs quality fix that avoids confusion for readers following the contribution guide.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43941",
-    "created_at": "2026-02-12T11:09:14Z",
-    "deletions": 48,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44219",
+    "created_at": "2026-02-23T00:43:59Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43941/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43941",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 43941,
-    "review_comments_count": 22,
+    "files_url": "https://github.com/huggingface/transformers/pull/44219/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44219",
+    "labels": [],
+    "merged": true,
+    "number": 44219,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix UnicodeDecodeError in PyTorch examples on Windows",
-    "updated_at": "2026-02-13T12:50:27Z"
+    "title": "Docs(it): fix typo in docstring wording",
+    "updated_at": "2026-02-23T15:04:51Z"
   },
   {
-    "additions": 23,
-    "author": "kaixuanliu",
+    "additions": 1,
+    "author": "matisgagneux21",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "@ydshieh This PR fixes failed case: `tests/models/exaone_moe/test_modeling_exaone_moe.py::ExaoneMoeIntegrationTest::test_model_logits`, pls help review, thx!",
+    "body_excerpt": "## Summary - fix a typo in the Italian migration guide install command: - `stentencepiece` -> `sentencepiece` ## Why The current command fails if copied as-is. This makes the installation snippet runnable for users reading the Italian docs.",
     "changed_files": 1,
-    "cluster_id": "cluster-43324-12",
-    "cluster_ids": [
-      "cluster-43324-12"
-    ],
-    "cluster_role": "member",
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43938",
-    "created_at": "2026-02-12T09:16:04Z",
-    "deletions": 6,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44218",
+    "created_at": "2026-02-23T00:32:49Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43938/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43938",
+    "files_url": "https://github.com/huggingface/transformers/pull/44218/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44218",
     "labels": [],
     "merged": true,
-    "number": 43938,
+    "number": 44218,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix failed test case for exaone_moe model",
-    "updated_at": "2026-02-27T07:22:46Z"
+    "title": "Docs(it): fix typo in sentencepiece install command",
+    "updated_at": "2026-02-23T15:05:17Z"
   },
   {
-    "additions": 47,
-    "author": "kaixuanliu",
+    "additions": 1,
+    "author": "matisgagneux21",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "@ydshieh , pls help review, thx!",
-    "changed_files": 3,
-    "cluster_id": "cluster-43324-12",
-    "cluster_ids": [
-      "cluster-43324-12"
-    ],
-    "cluster_role": "canonical",
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43936",
-    "created_at": "2026-02-12T08:34:03Z",
-    "deletions": 19,
+    "body_excerpt": "## Summary - Update the Italian migration guide to avoid pointing users to the deprecated `grouped_entities` flag. - Clarify that `aggregation_strategy` is the current option (with a note that it was previously `grouped_entities`). ## Why\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44217",
+    "created_at": "2026-02-23T00:10:48Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43936/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43936",
+    "files_url": "https://github.com/huggingface/transformers/pull/44217/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44217",
     "labels": [],
-    "merged": true,
-    "number": 43936,
-    "review_comments_count": 13,
+    "merged": false,
+    "number": 44217,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix failed unit tests for moonshine_streaming model",
-    "updated_at": "2026-03-06T07:39:09Z"
+    "title": "Docs(it): update deprecated grouped_entities reference",
+    "updated_at": "2026-02-23T14:56:19Z"
   },
   {
-    "additions": 1245,
-    "author": "MekkCyber",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Adds mlx quantization for mps devices leveraging the `kernels` library for pre-built kernels !!",
-    "changed_files": 13,
+    "additions": 13,
+    "author": "nikste",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Automated fix for #44016: Syntax error in Transformer section 3 (Transformers, what can they do?) notebook Fixes #44016 ## What does this PR do? This PR addresses issue #44016 by implementing the fix described in the issue. ---\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43934",
-    "created_at": "2026-02-12T07:59:02Z",
-    "deletions": 4,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44216",
+    "created_at": "2026-02-22T23:40:56Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43934/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43934",
+    "files_url": "https://github.com/huggingface/transformers/pull/44216/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44216",
     "labels": [],
-    "merged": true,
-    "number": 43934,
-    "review_comments_count": 20,
+    "merged": false,
+    "number": 44216,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[Quantization] Add metal quantization for MPS devices!",
-    "updated_at": "2026-02-27T13:28:31Z"
+    "title": "Fix #44016: Syntax error in Transformer section 3 (Transformers, what can they do?) notebook",
+    "updated_at": "2026-02-22T23:50:57Z"
   },
   {
-    "additions": 66,
-    "author": "quic-meetkuma",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR adds hardware backend called \"qaic\" which is for Qualcomm's AI Accelerator. The inclusion is similar to any other hardware backend in the Trainer. With this the user will be able to use Qualcomm's AI Acceler\u2026",
-    "changed_files": 9,
+    "additions": 187,
+    "author": "jmriosal",
+    "author_association": "FIRST_TIMER",
+    "body_excerpt": "# What does this PR do? Add sequence classification capabilities to the family of Granite models (Granite, GraniteMoe, GraniteMoeHybrid, and GraniteMoeShared). Fixes #44214, #35720 ## Why The Granite models currently only have the base mod\u2026",
+    "changed_files": 17,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43933",
-    "created_at": "2026-02-12T06:14:52Z",
-    "deletions": 2,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/43933/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43933",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44215",
+    "created_at": "2026-02-22T23:24:43Z",
+    "deletions": 13,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44215/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44215",
     "labels": [],
     "merged": false,
-    "number": 43933,
+    "number": 44215,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Add sequence classification capability to Granite models",
+    "updated_at": "2026-02-24T20:39:37Z"
+  },
+  {
+    "additions": 70,
+    "author": "parthchopra07",
+    "author_association": "FIRST_TIMER",
+    "body_excerpt": "# What does this PR do? This PR refreshes the BEiT model documentation to align it with the current Transformers vision docs style and features. It updates the usage examples, clarifies configuration details, and improves the resources sec\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44213",
+    "created_at": "2026-02-22T18:32:16Z",
+    "deletions": 29,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44213/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44213",
+    "labels": [],
+    "merged": false,
+    "number": 44213,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Added support for qaic backend for Qualcomm's AI Accelerator",
-    "updated_at": "2026-02-17T16:53:38Z"
+    "title": "Update BEiT model card",
+    "updated_at": "2026-02-28T14:33:57Z"
   },
   {
-    "additions": 3,
-    "author": "quic-meetkuma",
+    "additions": 1,
+    "author": "alexandercarruthers",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? It fixes minor issue in the gather_full_tensor function. The existing implementation tries to perform all_gather across all the ranks of distributed training. But when we talk about the case of TP+DP then we only ne\u2026",
+    "body_excerpt": "# What does this PR do? Updates a broken link in the summarization guide. https://huggingface.co/docs/transformers/tasks/summarization https://huggingface.co/billsum/datasets results in a 404. New URL is https://huggingface.co/datasets/Fis\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43932",
-    "created_at": "2026-02-12T05:49:32Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44212",
+    "created_at": "2026-02-22T18:02:43Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43932/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43932",
+    "files_url": "https://github.com/huggingface/transformers/pull/44212/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44212",
     "labels": [],
     "merged": true,
-    "number": 43932,
+    "number": 44212,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Inclusion of process_group in the gather_full_tensor function in tensor_parallel.py",
-    "updated_at": "2026-02-13T14:29:24Z"
+    "title": "Update 404ing BillSum dataset URL on Summarization Task guide",
+    "updated_at": "2026-02-23T14:46:11Z"
   },
   {
-    "additions": 25,
-    "author": "cyyever",
+    "additions": 10,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## Fix for #44016 The `grouped_entities` parameter in `TokenClassificationPipeline._sanitize_parameters` was removed without a deprecation period, causing a `TypeError` when users pass `grouped_entities=True` to the `pipeline()` call (as s\u2026",
+    "changed_files": 1,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44211",
+    "created_at": "2026-02-22T17:04:50Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44211/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44211",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44211,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Add backward compatibility for deprecated grouped_entities parameter",
+    "updated_at": "2026-02-23T16:26:02Z"
+  },
+  {
+    "additions": 1,
+    "author": "nightcityblade",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? We can use PT 2.4.0 is_autocast_enabled changes to improve code.",
-    "changed_files": 15,
+    "body_excerpt": "Fixes #44206 ## Problem PR #43769 (\"Add Voxtral Realtime\") added a `center` parameter to `LasrFeatureExtractor.__call__()` and passed it to `_torch_extract_fbank_features()`, but that method does not accept it. This causes a `TypeError` on\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43930",
-    "created_at": "2026-02-12T02:04:11Z",
-    "deletions": 20,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44210",
+    "created_at": "2026-02-22T16:06:16Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43930/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43930",
+    "files_url": "https://github.com/huggingface/transformers/pull/44210/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44210",
     "labels": [],
-    "merged": true,
-    "number": 43930,
+    "merged": false,
+    "number": 44210,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Improve use of torch.is_autocast_enabled",
-    "updated_at": "2026-02-17T00:42:39Z"
+    "title": "fix(lasr): remove spurious center arg from _torch_extract_fbank_features call",
+    "updated_at": "2026-02-23T14:32:41Z"
   },
   {
-    "additions": 174,
-    "author": "stevhliu",
-    "author_association": "MEMBER",
-    "body_excerpt": "the second part to #43900. this pr focuses on customizing tokenizers: - training your own - passing a custom vocabulary to `vocab` and `merges` - subclassing `TokenizersBackend`",
-    "changed_files": 2,
+    "additions": 197,
+    "author": "paipeline",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? This PR fixes a critical bug in LayoutLMv2Tokenizer where passing `word_labels` for NER token classification tasks would crash with `AttributeError`. The issue was that `word_ids` and `sequence_ids` were being acce\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43929",
-    "created_at": "2026-02-11T23:20:18Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44209",
+    "created_at": "2026-02-22T14:37:25Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43929/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43929",
+    "files_url": "https://github.com/huggingface/transformers/pull/44209/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44209",
     "labels": [],
-    "merged": true,
-    "number": 43929,
+    "merged": false,
+    "number": 44209,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[docs] customizing tokenizers",
-    "updated_at": "2026-02-17T17:15:32Z"
+    "title": "Fix LayoutLMv2Tokenizer NER crashes with word_labels",
+    "updated_at": "2026-02-23T10:30:26Z"
   },
   {
-    "additions": 48,
-    "author": "harshaljanjani",
+    "additions": 1,
+    "author": "ainergiz",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? The following failing Dia use case was identified and fixed in this PR: \u2192 Tests that created `DiaConfig` with custom token IDs (`eos_token_id=97` for a `vocab_size=100`) failed because saving then reloading the co\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? This PR fixes a LASR regression introduced in `#43769` (released in `v5.2.0`). `LasrFeatureExtractor.__call__` passes `center` into `_torch_extract_fbank_features(...)`, but `_torch_extract_fbank_features` did not a\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 9,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43928",
-    "created_at": "2026-02-11T19:59:42Z",
-    "deletions": 15,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44207",
+    "created_at": "2026-02-21T20:56:49Z",
+    "deletions": 70,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43928/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43928",
+    "files_url": "https://github.com/huggingface/transformers/pull/44207/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44207",
     "labels": [],
     "merged": true,
-    "number": 43928,
-    "review_comments_count": 3,
+    "number": 44207,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(models): Preserve custom token IDs through DiaConfig save and load",
-    "updated_at": "2026-02-13T10:56:31Z"
+    "title": "Fix LASR feature extractor regression from invalid center argument",
+    "updated_at": "2026-02-23T10:01:35Z"
   },
   {
-    "additions": 24,
-    "author": "kashif",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 2,
+    "additions": 1,
+    "author": "nightcityblade",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes #44186 `LayoutLMv2Tokenizer.__init__` passes `only_label_first_subword` to `super().__init__()` but never stores it as `self.only_label_first_subword`. This causes an `AttributeError` when `word_labels` is pa\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43926",
-    "created_at": "2026-02-11T19:45:46Z",
-    "deletions": 105,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44204",
+    "created_at": "2026-02-21T16:06:46Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43926/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43926",
+    "files_url": "https://github.com/huggingface/transformers/pull/44204/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44204",
     "labels": [],
-    "merged": true,
-    "number": 43926,
+    "merged": false,
+    "number": 44204,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[Deespeed] fix WeightConverter.convert() use",
-    "updated_at": "2026-02-16T14:07:30Z"
+    "title": "fix(layoutlmv2): store only_label_first_subword attribute in tokenizer",
+    "updated_at": "2026-02-23T10:30:19Z"
   },
   {
-    "additions": 5,
-    "author": "DimiChatzipavlis",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Adds explicit `encoding=\"utf-8\"` to `open()` calls in `.circleci/create_circleci_config.py` and `.circleci/parse_test_outputs.py`. ## The Problem On Windows, `open()` defaults to the system encoding (often `cp1252`)\u2026",
-    "changed_files": 2,
+    "additions": 22,
+    "author": "nightcityblade",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary Fixes #44075 `_get_sgd()`, `_get_adagrad()`, and `_get_rmsprop()` in `trainer_optimizer.py` only returned `ctx.optimizer_kwargs` (which contains just `lr`), completely ignoring `ctx.optim_args`. This meant that parameters specif\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43925",
-    "created_at": "2026-02-11T19:18:51Z",
-    "deletions": 5,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44203",
+    "created_at": "2026-02-21T15:12:17Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43925/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43925",
+    "files_url": "https://github.com/huggingface/transformers/pull/44203/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44203",
     "labels": [],
     "merged": true,
-    "number": 43925,
+    "number": 44203,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add explicit utf-8 encoding to CircleCI scripts for Windows compatibility",
-    "updated_at": "2026-02-12T13:59:02Z"
+    "title": "fix(trainer): pass optim_args to SGD, Adagrad, and RMSprop optimizers",
+    "updated_at": "2026-02-25T16:04:20Z"
   },
   {
-    "additions": 576,
-    "author": "vasqu",
-    "author_association": "MEMBER",
-    "body_excerpt": "WIP",
-    "changed_files": 48,
+    "additions": 63,
+    "author": "GS-GOAT",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? When `torch.compile` is used, [_ignore_bidirectional_mask_sdpa](cci:1://file:///c:/Users/BIT/Desktop/proj/gitrepo_clones/transformers/src/transformers/masking_utils.py:303:0-338:16) behaves differently than in eager\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 8,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43924",
-    "created_at": "2026-02-11T17:35:45Z",
-    "deletions": 726,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/43924/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43924",
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44202",
+    "created_at": "2026-02-21T13:45:15Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44202/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44202",
     "labels": [],
     "merged": false,
-    "number": 43924,
+    "number": 44202,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "[`Attn`] More old mask APIs",
-    "updated_at": "2026-02-11T22:34:32Z"
+    "state": "closed",
+    "title": "Fix: bidirectional mask skip when attention dropout is active (#44188)",
+    "updated_at": "2026-03-09T10:31:41Z"
   },
   {
-    "additions": 0,
-    "author": "Rocketknight1",
+    "additions": 18,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "The `no_grad` decorators created problems, especially when new tensors are created! Reverting that PR until we can narrow down exactly which areas are problematic.",
-    "changed_files": 9,
+    "body_excerpt": "# What does this PR do? main is currently failing with ``` FAILED tests/models/higgs_audio_v2/test_modeling_higgs_audio_v2.py::HiggsAudioV2ModelTest::test_generate_compilation_all_outputs - AssertionError: Lists differ: [torch.Size([2, 15,\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43923",
-    "created_at": "2026-02-11T16:39:30Z",
-    "deletions": 17,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44201",
+    "created_at": "2026-02-21T10:03:41Z",
+    "deletions": 13,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43923/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43923",
+    "files_url": "https://github.com/huggingface/transformers/pull/44201/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44201",
     "labels": [],
     "merged": true,
-    "number": 43923,
-    "review_comments_count": 0,
+    "number": 44201,
+    "review_comments_count": 3,
     "state": "closed",
-    "title": "Revert #43897",
-    "updated_at": "2026-02-11T17:21:07Z"
+    "title": "fix: HiggsAudioV2 cached decode inputs in compiled generation",
+    "updated_at": "2026-02-23T12:39:19Z"
   },
   {
-    "additions": 2,
-    "author": "DimiChatzipavlis",
+    "additions": 3,
+    "author": "pragnyanramtha",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes a hard crash (`AttributeError`) in `src/transformers/models/owlvit/convert_owlvit_original_flax_to_hf.py` caused by `collections.MutableMapping`, which was removed in Python 3.10. ## The Problem The script imp\u2026",
+    "body_excerpt": "Fixes #43782 The `weights_only` parameter passed to `from_pretrained()` was not being forwarded to `load_state_dict()` when loading `.bin` checkpoint files in the non-DeepSpeed code path. This caused `weights_only` to always default to `Tr\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43922",
-    "created_at": "2026-02-11T16:22:54Z",
-    "deletions": 2,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44200",
+    "created_at": "2026-02-21T06:24:17Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43922/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43922",
-    "labels": [],
-    "merged": true,
-    "number": 43922,
+    "files_url": "https://github.com/huggingface/transformers/pull/44200/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44200",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44200,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix AttributeError in OwlViT conversion script for Python 3.10+",
-    "updated_at": "2026-02-12T08:33:47Z"
+    "title": "fix: propagate `weights_only` param to `load_state_dict` in .bin loading path (#43782)",
+    "updated_at": "2026-02-23T14:20:12Z"
   },
   {
     "additions": 3,
-    "author": "svlandeg",
+    "author": "gowthamr-tech",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Hi \ud83d\udc4b Typer maintainer here. # What does this PR do? We used to offer `typer-slim` as a more light-weight package, which didn't depend on `rich` and `shellingham`. Unfortuntately, the way it was set up meant that we ran into issues with pac\u2026",
-    "changed_files": 2,
+    "body_excerpt": "## What does this PR do? This PR fixes an issue in `run_image_classification_no_trainer.py` where the script always loaded `dataset_name` (e.g., CIFAR10) even when `--train_dir` or `--validation_dir` was provided. Now, when local dataset d\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43921",
-    "created_at": "2026-02-11T15:54:04Z",
-    "deletions": 3,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44199",
+    "created_at": "2026-02-21T06:03:29Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43921/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43921",
+    "files_url": "https://github.com/huggingface/transformers/pull/44199/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44199",
     "labels": [],
     "merged": true,
-    "number": 43921,
+    "number": 44199,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Depend on `typer` instead of `typer-slim`",
-    "updated_at": "2026-02-16T19:02:58Z"
+    "title": "Fix local dataset loading priority in run_image_classification_no_tra\u2026",
+    "updated_at": "2026-02-24T15:10:17Z"
   },
   {
-    "additions": 1,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. This is a core file, and we cannot allow to change it without triggering everything - see https://github.com/huggingface/transformers/pull/43897#issuecomment-3885203477 as well",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
+    "additions": 71,
+    "author": "danielalanbates",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes #43975 ## Summary This PR fixes: `deepseek-ai/deepseek-coder-6.7b-instruct` incorrectly detokenizes in v5 ## Changes ``` src/transformers/tokenization_utils_tokenizers.py | 12 ++++- tests/models/llama/test_tokenization_llama.py | 60\u2026",
+    "changed_files": 2,
+    "cluster_id": "cluster-44191-8",
+    "cluster_ids": [
+      "cluster-44191-8"
+    ],
+    "cluster_role": "member",
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43920",
-    "created_at": "2026-02-11T15:42:20Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44198",
+    "created_at": "2026-02-21T04:54:47Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43920/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43920",
+    "files_url": "https://github.com/huggingface/transformers/pull/44198/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44198",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44198,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix #43975: `deepseek-ai/deepseek-coder-6.7b-instruct` incorrectly detok",
+    "updated_at": "2026-02-23T14:10:47Z"
+  },
+  {
+    "additions": 37,
+    "author": "danielalanbates",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes #43937 ## Summary This PR fixes: [GLM-5] ValueError: GenerationConfig is invalid ## Changes ``` src/transformers/generation/configuration_utils.py | 13 +++++++++++- src/transformers/modeling_utils.py | 2 +- tests/generation/test_conf\u2026",
+    "changed_files": 3,
+    "cluster_id": "cluster-44191-8",
+    "cluster_ids": [
+      "cluster-44191-8"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44197",
+    "created_at": "2026-02-21T04:47:32Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44197/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44197",
     "labels": [],
-    "merged": true,
-    "number": 43920,
+    "merged": false,
+    "number": 44197,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix #43937: [GLM-5] ValueError: GenerationConfig is invalid",
+    "updated_at": "2026-02-23T09:42:54Z"
+  },
+  {
+    "additions": 12,
+    "author": "danielalanbates",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes #43881 ## Summary This PR fixes: glm-4v-9b loading failed ## Changes ``` src/transformers/configuration_utils.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) ``` ## Testing Please review the changes carefully. T\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-44191-8",
+    "cluster_ids": [
+      "cluster-44191-8"
+    ],
+    "cluster_role": "member",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44196",
+    "created_at": "2026-02-21T04:41:02Z",
+    "deletions": 1,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44196/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44196",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44196,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Changes to cache_utils should trigger all tests all the time",
-    "updated_at": "2026-02-11T15:51:31Z"
+    "title": "Fix #43881: glm-4v-9b loading failed",
+    "updated_at": "2026-02-23T09:45:03Z"
   },
   {
-    "additions": 19,
-    "author": "SunMarc",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR enables to use `sync_each_batch` argument when passing `gradient_accumulation_kwargs` in `AcceleratorConfig`. I'm also removing `adjust_scheduler` docstring as it is not used/enabled for now as we don't prep\u2026",
-    "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43919",
-    "created_at": "2026-02-11T15:37:05Z",
-    "deletions": 12,
+    "additions": 2,
+    "author": "danielalanbates",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes #44062 ## Summary This PR fixes: TypeError: tokenizers.AddedToken() got multiple values for keyword argument 'special' ## Changes ``` src/transformers/tokenization_utils_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-44191-8",
+    "cluster_ids": [
+      "cluster-44191-8"
+    ],
+    "cluster_role": "member",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44195",
+    "created_at": "2026-02-21T04:38:14Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43919/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43919",
-    "labels": [],
-    "merged": true,
-    "number": 43919,
+    "files_url": "https://github.com/huggingface/transformers/pull/44195/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44195",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44195,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix sync gradient",
-    "updated_at": "2026-02-13T14:38:09Z"
+    "title": "Fix #44062: TypeError: tokenizers.AddedToken() got multiple values for k",
+    "updated_at": "2026-02-23T14:10:30Z"
   },
   {
-    "additions": 2,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "additions": 16,
+    "author": "danielalanbates",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes #44075 ## Summary This PR fixes: Optimizer SGD args are not used ## Changes ``` src/transformers/trainer_optimizer.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) ``` ## Testing Please review the changes carefully. The fix\u2026",
     "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43918",
-    "created_at": "2026-02-11T15:26:19Z",
+    "cluster_id": "cluster-44191-8",
+    "cluster_ids": [
+      "cluster-44191-8"
+    ],
+    "cluster_role": "member",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44194",
+    "created_at": "2026-02-21T04:35:53Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43918/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43918",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44194/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44194",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 43918,
+    "number": 44194,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "foo",
-    "updated_at": "2026-02-11T15:46:51Z"
+    "title": "Fix #44075: Optimizer SGD args are not used",
+    "updated_at": "2026-02-23T14:10:20Z"
   },
   {
-    "additions": 1157,
-    "author": "IlyasMoutawwakil",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Integrates a library wide monkey patching API to fix #43284 and allow things like restructuring the expert weights and fusing the qkv projections. A concrete and complete example: <details> ```py from typing import\u2026",
-    "changed_files": 5,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43917",
-    "created_at": "2026-02-11T15:08:30Z",
-    "deletions": 3,
+    "additions": 2,
+    "author": "danielalanbates",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes #43986 ## Summary This PR fixes: Confusing crash when loading a video model through AutoProcessor without torchvision installed ## Changes ``` src/transformers/models/auto/video_processing_auto.py | 2 ++ 1 file changed, 2 insertions(\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-44191-8",
+    "cluster_ids": [
+      "cluster-44191-8"
+    ],
+    "cluster_role": "member",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44193",
+    "created_at": "2026-02-21T04:34:37Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43917/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43917",
-    "labels": [],
-    "merged": true,
-    "number": 43917,
-    "review_comments_count": 38,
+    "files_url": "https://github.com/huggingface/transformers/pull/44193/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44193",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44193,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Model patching API",
-    "updated_at": "2026-03-02T10:10:50Z"
+    "title": "Fix #43986: Confusing crash when loading a video model through AutoProce",
+    "updated_at": "2026-02-23T09:46:15Z"
   },
   {
-    "additions": 616,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. See discussion in https://github.com/huggingface/transformers/issues/43885",
-    "changed_files": 296,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43916",
-    "created_at": "2026-02-11T14:36:44Z",
-    "deletions": 721,
+    "additions": 3,
+    "author": "danielalanbates",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes #44079 ## Summary This PR fixes: `ModelOutput` keys aren't correctly assigned if key was previously None ## Changes ``` src/transformers/utils/generic.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) ``` ## Testing Please r\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-44191-8",
+    "cluster_ids": [
+      "cluster-44191-8"
+    ],
+    "cluster_role": "canonical",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44192",
+    "created_at": "2026-02-21T04:33:52Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43916/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43916",
-    "labels": [],
-    "merged": true,
-    "number": 43916,
-    "review_comments_count": 2,
+    "files_url": "https://github.com/huggingface/transformers/pull/44192/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44192",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44192,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Harmonize `input_embeds` to `inputs_embeds` everywhere",
-    "updated_at": "2026-02-11T16:03:56Z"
+    "title": "Fix #44079: `ModelOutput` keys aren't correctly assigned if key was prev",
+    "updated_at": "2026-02-23T14:10:14Z"
   },
   {
-    "additions": 272,
-    "author": "merveenoyan",
-    "author_association": "MEMBER",
-    "body_excerpt": "notebook to converted models e2e inference: https://colab.research.google.com/drive/1g-Vc-Zvjy_STNEUbWJhYDTpFyT7o6TGl?usp=sharing models: https://huggingface.co/merve/PaddleOCR-VL-hf https://huggingface.co/merve/PaddleOCR-VL-1.5-hf @molbap",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43915",
-    "created_at": "2026-02-11T14:30:59Z",
-    "deletions": 0,
+    "additions": 95,
+    "author": "danielalanbates",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes #44155 ## Summary This PR fixes: [AudioFlamingo3] Batched inference produces incorrect results due to embedding/token leak between tracks ## Changes ``` .../audioflamingo3/modeling_audioflamingo3.py | 51 +++++++++++++++++++--- .../au\u2026",
+    "changed_files": 3,
+    "cluster_id": "cluster-44191-8",
+    "cluster_ids": [
+      "cluster-44191-8"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44191",
+    "created_at": "2026-02-21T04:32:30Z",
+    "deletions": 11,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43915/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43915",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44191/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44191",
+    "labels": [
+      "Audio"
+    ],
     "merged": false,
-    "number": 43915,
+    "number": 44191,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "add PaddleOCR-VL conversion",
-    "updated_at": "2026-02-11T14:40:51Z"
+    "state": "closed",
+    "title": "Fix #44155: [AudioFlamingo3] Batched inference produces incorrect result",
+    "updated_at": "2026-03-19T16:16:17Z"
   },
   {
-    "additions": 2409,
-    "author": "SunMarc",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR reorder all methods from Trainer. No modifications was done to the methods itself. With this final PR, all the major changes should be done and we can focus on simplifying the core logic + testing. Reorderin\u2026",
+    "additions": 3,
+    "author": "excepshenal",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Under fp16_full_eval or bf16_full_eval, still don't move model to device if using another dist train backend. This is causing bugs with FSDP2 + bf16_full_eval. The dist train backend would still be in charge of movi\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43914",
-    "created_at": "2026-02-11T13:36:39Z",
-    "deletions": 2323,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44189",
+    "created_at": "2026-02-21T00:06:16Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43914/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43914",
+    "files_url": "https://github.com/huggingface/transformers/pull/44189/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44189",
     "labels": [],
-    "merged": true,
-    "number": 43914,
+    "merged": false,
+    "number": 44189,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Reorder Trainer methods",
-    "updated_at": "2026-02-13T17:07:59Z"
+    "state": "open",
+    "title": "fix: don't move model to device under other dist train backends",
+    "updated_at": "2026-02-21T00:06:16Z"
   },
   {
-    "additions": 38,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. Fixes https://github.com/huggingface/transformers/issues/43299#issuecomment-3849688073",
-    "changed_files": 3,
+    "additions": 3,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? The following issues were identified and fixed in this PR: \u2192 The NER/token classification issue and the downstream bug uncovered in the batched preprocessing use case with `LayoutLMv2Tokenizer`. \u2192 **Reasoning:** T\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43913",
-    "created_at": "2026-02-11T12:44:05Z",
-    "deletions": 10,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44187",
+    "created_at": "2026-02-20T20:02:04Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43913/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43913",
+    "files_url": "https://github.com/huggingface/transformers/pull/44187/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44187",
     "labels": [],
-    "merged": false,
-    "number": 43913,
+    "merged": true,
+    "number": 44187,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add a sentinel mechanism in Transpose and fix qwen3_vl_moe weight mapping",
-    "updated_at": "2026-02-16T15:53:09Z"
+    "title": "fix(models): Fix LayoutLMv2 NER crash and broken batched truncation/padding",
+    "updated_at": "2026-02-23T10:30:51Z"
   },
   {
-    "additions": 790,
-    "author": "JaredforReal",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? - fix k_norm as layernorm - add index_head_dim to config - rewrite GlmMoeDsaConfig from PreTrainedConfig - rewrite indexer as an nn.Module class - fix mlp layers mismatch - implement Attention.forward() # Current st\u2026",
-    "changed_files": 5,
+    "additions": 361,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "part 1 of refactoring the `Trainer` docs - restructure the `toctree` a bit to accommodate new sections and docs - slim down `trainer.md` to be a clearer entry point (will expand the `## Next steps` section as we continue for better navigat\u2026",
+    "changed_files": 7,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43912",
-    "created_at": "2026-02-11T12:37:41Z",
-    "deletions": 348,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44185",
+    "created_at": "2026-02-20T19:25:07Z",
+    "deletions": 578,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43912/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43912",
+    "files_url": "https://github.com/huggingface/transformers/pull/44185/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44185",
     "labels": [],
     "merged": true,
-    "number": 43912,
-    "review_comments_count": 18,
+    "number": 44185,
+    "review_comments_count": 19,
     "state": "closed",
-    "title": "[fix][wip] GlmMoeDsa: try implement DSA",
-    "updated_at": "2026-03-04T16:30:36Z"
+    "title": "[docs] trainer part 1",
+    "updated_at": "2026-02-24T21:18:42Z"
   },
   {
-    "additions": 1,
-    "author": "pavel-esir",
+    "additions": 191,
+    "author": "mariam851",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Without this like `AutoTokenizer.from_pretrained(...)` does not create `LlamaTokenizer` object. ## Before submitting - [x] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the ca\u2026",
-    "changed_files": 1,
+    "body_excerpt": "This PR implements the initial architecture for CircuitGPT (based on OpenAI's research), as discussed in #44121. Key implementations: SparseLinear: Custom layer with Top-K weight sparsity logic. CircuitGpt Components: Attention, MLP, and C\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43911",
-    "created_at": "2026-02-11T10:53:54Z",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44184",
+    "created_at": "2026-02-20T16:58:27Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43911/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43911",
+    "files_url": "https://github.com/huggingface/transformers/pull/44184/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44184",
     "labels": [],
     "merged": false,
-    "number": 43911,
+    "number": 44184,
     "review_comments_count": 0,
     "state": "open",
-    "title": "add Llama to mapping names in tokenization_auto.py",
-    "updated_at": "2026-02-18T10:58:59Z"
+    "title": "feat: add OpenAI CircuitGPT core architecture and sparse linear layers",
+    "updated_at": "2026-02-20T17:18:44Z"
   },
   {
-    "additions": 17,
-    "author": "kaixuanliu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "@ydshieh , pls help review, thx!",
-    "changed_files": 1,
-    "cluster_id": "cluster-43324-12",
-    "cluster_ids": [
-      "cluster-43324-12"
-    ],
-    "cluster_role": "member",
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43910",
-    "created_at": "2026-02-11T09:43:08Z",
-    "deletions": 2,
+    "additions": 1,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "Our code has some references to the `grouped_entities` arg to the token classification pipeline, but this is no longer usable. This PR cleans them up entirely! Fixes #44016",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44182",
+    "created_at": "2026-02-20T15:28:26Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43910/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43910",
+    "files_url": "https://github.com/huggingface/transformers/pull/44182/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44182",
     "labels": [],
     "merged": true,
-    "number": 43910,
-    "review_comments_count": 3,
+    "number": 44182,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Update expected output for Jais2 model tests",
-    "updated_at": "2026-02-27T08:28:06Z"
+    "title": "Remove refs to grouped_entities",
+    "updated_at": "2026-02-24T16:07:24Z"
   },
   {
-    "additions": 13,
-    "author": "kaixuanliu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "@ydshieh , pls help review, thx!",
-    "changed_files": 1,
-    "cluster_id": "cluster-43324-12",
-    "cluster_ids": [
-      "cluster-43324-12"
-    ],
-    "cluster_role": "member",
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43907",
-    "created_at": "2026-02-11T08:30:31Z",
-    "deletions": 36,
+    "additions": 898,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title! Follow up of https://github.com/huggingface/transformers/pull/44130 and https://github.com/huggingface/transformers/pull/44226. Finally remove the `cache_position` everywhere (not ALL models, but a\u2026",
+    "changed_files": 169,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 11,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44181",
+    "created_at": "2026-02-20T15:24:39Z",
+    "deletions": 2698,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43907/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43907",
+    "files_url": "https://github.com/huggingface/transformers/pull/44181/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44181",
     "labels": [],
     "merged": true,
-    "number": 43907,
-    "review_comments_count": 0,
+    "number": 44181,
+    "review_comments_count": 32,
     "state": "closed",
-    "title": "update glm image model expected out for tests",
-    "updated_at": "2026-02-27T07:21:14Z"
+    "title": "[core] \ud83d\udea8 Completely remove cache positions",
+    "updated_at": "2026-03-04T18:08:42Z"
   },
   {
-    "additions": 31,
-    "author": "YangKai0616",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Per the title. Enable the corresponding tests and re-enable the tests that were skipped before.",
-    "changed_files": 4,
+    "additions": 28,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fixes a flaky test in IdeficsForVisionText2TextTest::test_generate_continue_from_inputs_embeds. The flakiness can be reproduced with: ``` pytest -q -p no:rerunfailures --flake-finder --flake-runs=20 \\ tests/models/i\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43905",
-    "created_at": "2026-02-11T05:31:42Z",
-    "deletions": 23,
+    "comments_count": 10,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44180",
+    "created_at": "2026-02-20T14:30:46Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43905/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43905",
+    "files_url": "https://github.com/huggingface/transformers/pull/44180/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44180",
     "labels": [],
     "merged": true,
-    "number": 43905,
-    "review_comments_count": 2,
+    "number": 44180,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "XPU now supports backward for the FA2 fixed path",
-    "updated_at": "2026-02-26T10:48:28Z"
+    "title": "fix(flaky): idefics generate cache flake",
+    "updated_at": "2026-02-26T16:18:18Z"
   },
   {
-    "additions": 3,
-    "author": "cyyever",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? removes two unused dependencies.",
-    "changed_files": 3,
+    "additions": 27,
+    "author": "itazap",
+    "author_association": "MEMBER",
+    "body_excerpt": "Models with incorrect tokenizer_class in tokenization_config.json that should use TokenziersBackend",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43904",
-    "created_at": "2026-02-11T02:48:18Z",
-    "deletions": 7,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44179",
+    "created_at": "2026-02-20T13:51:44Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43904/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43904",
+    "files_url": "https://github.com/huggingface/transformers/pull/44179/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44179",
     "labels": [],
     "merged": true,
-    "number": 43904,
-    "review_comments_count": 2,
+    "number": 44179,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Remove unused dependencies",
-    "updated_at": "2026-02-18T01:04:31Z"
+    "title": "Models with incorrect tokenizer_class in tokenization_config.json tha\u2026",
+    "updated_at": "2026-02-23T08:33:13Z"
   },
   {
-    "additions": 0,
-    "author": "math-hiyoko",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 1,
+    "additions": 2940,
+    "author": "ebezzam",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Re-opening https://github.com/huggingface/transformers/pull/37868 TODO - [x] recompute expected outputs - [x] passthrough code given new conventions - [x] check for unused code paths / configuration parameters Origi\u2026",
+    "changed_files": 27,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43903",
-    "created_at": "2026-02-11T01:44:06Z",
-    "deletions": 3,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44178",
+    "created_at": "2026-02-20T12:36:21Z",
+    "deletions": 48,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43903/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43903",
+    "files_url": "https://github.com/huggingface/transformers/pull/44178/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44178",
     "labels": [],
-    "merged": true,
-    "number": 43903,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: TextClassificationPipeline docs mentioning deprecated return_all_scores",
-    "updated_at": "2026-02-11T17:13:26Z"
+    "merged": false,
+    "number": 44178,
+    "review_comments_count": 8,
+    "state": "open",
+    "title": "Add xcodec2 model",
+    "updated_at": "2026-03-19T12:08:27Z"
   },
   {
-    "additions": 23,
-    "author": "cyyever",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR fixes and removes more outdated documentation contents <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the t\u2026",
-    "changed_files": 15,
+    "additions": 41,
+    "author": "vasqu",
+    "author_association": "MEMBER",
+    "body_excerpt": "As per title, spiritual successor to #44081 Why? Because as is - Only defaults for fa2/fa3, not on other requested kernels - Limits implementations to one kernel/implementation while I suspect that there will be multiple viable versions (i\u2026",
+    "changed_files": 7,
     "cluster_id": null,
     "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43902",
-    "created_at": "2026-02-11T01:39:07Z",
-    "deletions": 164,
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44177",
+    "created_at": "2026-02-20T12:13:30Z",
+    "deletions": 71,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43902/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43902",
+    "files_url": "https://github.com/huggingface/transformers/pull/44177/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44177",
     "labels": [],
     "merged": true,
-    "number": 43902,
-    "review_comments_count": 0,
+    "number": 44177,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Fix old tech stack in doc",
-    "updated_at": "2026-02-11T13:23:56Z"
+    "title": "[`Flash Attn`] Enable compatible implementations",
+    "updated_at": "2026-02-20T12:43:35Z"
   },
   {
-    "additions": 165,
-    "author": "stevhliu",
+    "additions": 271,
+    "author": "vasqu",
     "author_association": "MEMBER",
-    "body_excerpt": "refactors tokenizer docs for v5: - describes new unified tokenization backend architecture and how it works - empty tokenizer initialization with model-specific tokenizer - update structure to mirror developer workflow (load -> encode/deco\u2026",
-    "changed_files": 4,
+    "body_excerpt": "Our kernel loading is incompatible with the original packages as they do not expose the same import structure: - Kernels seem to expose things in the init (and not in the original path) - Original packages seem to expose only within their\u2026",
+    "changed_files": 14,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43900",
-    "created_at": "2026-02-11T00:42:19Z",
-    "deletions": 314,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44176",
+    "created_at": "2026-02-20T11:36:01Z",
+    "deletions": 124,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43900/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43900",
+    "files_url": "https://github.com/huggingface/transformers/pull/44176/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44176",
     "labels": [],
     "merged": true,
-    "number": 43900,
-    "review_comments_count": 9,
+    "number": 44176,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "[docs] refactor tokenizer docs",
-    "updated_at": "2026-02-17T17:50:01Z"
+    "title": "[`Mamba`] Fix kernel loading",
+    "updated_at": "2026-02-20T16:19:06Z"
   },
   {
-    "additions": 5,
+    "additions": 1,
     "author": "itazap",
     "author_association": "MEMBER",
-    "body_excerpt": null,
+    "body_excerpt": "add jamba tokenizer mapping to PreTrainedTokenizerFast (v4/v5 BC) for vllm: https://buildkite.com/vllm/ci/builds/52260/steps/canvas?sid=019c76ad-c8f2-4e59-a2f4-5f3b5bbc204c&tab=output",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43898",
-    "created_at": "2026-02-10T17:37:34Z",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44175",
+    "created_at": "2026-02-20T11:00:18Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43898/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43898",
+    "files_url": "https://github.com/huggingface/transformers/pull/44175/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44175",
     "labels": [],
-    "merged": true,
-    "number": 43898,
+    "merged": false,
+    "number": 44175,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "adding BC for custom toks accessing slow tok attrs deprecated in v5",
-    "updated_at": "2026-02-10T21:21:33Z"
+    "title": "add jamba tokenizer mapping to PreTrainedTokenizerFast (v4/v5 BC)",
+    "updated_at": "2026-02-20T16:19:31Z"
   },
   {
-    "additions": 17,
-    "author": "Rocketknight1",
+    "additions": 1367,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "Although our cache update methods are usually used in inference, when grad is disabled anyway, there seem to be some edge cases where they cause problems with compilation and gradient computation. Since we never want to propagate gradient\u2026",
-    "changed_files": 9,
+    "body_excerpt": "This draft expands `utils/check_modeling_structure.py` into a rule-driven linter for model code, with new checks and tests, while keeping runtime very low. Key features: - The checker is intentionally AST-only (no heavy imports/execution),\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43897",
-    "created_at": "2026-02-10T17:15:15Z",
-    "deletions": 0,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44174",
+    "created_at": "2026-02-20T10:38:11Z",
+    "deletions": 24,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43897/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43897",
+    "files_url": "https://github.com/huggingface/transformers/pull/44174/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44174",
     "labels": [],
     "merged": true,
-    "number": 43897,
-    "review_comments_count": 0,
+    "number": 44174,
+    "review_comments_count": 38,
     "state": "closed",
-    "title": "Decorate cache updates with no_grad, just in case",
-    "updated_at": "2026-02-11T15:31:40Z"
+    "title": "Expand model-structure lint rules with a fast AST-based, ruff-like framework",
+    "updated_at": "2026-03-12T06:42:21Z"
   },
   {
-    "additions": 427,
-    "author": "ebezzam",
+    "additions": 20,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As discussed in https://github.com/huggingface/transformers/pull/43820#pullrequestreview-3780031785, expected outputs of DAC need to be recomputed Reasoning: outputs were changed with https://github.com/huggingface/\u2026",
+    "body_excerpt": "Fixes flaky GLM OCR generation behavior when 2D `position_ids` are passed explicitly. Reproducible locally with: ``` pytest tests/models/glm_ocr/test_modeling_glm_ocr.py::GlmOcrModelTest::test_generate_with_and_without_position_ids --flake\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 10,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43896",
-    "created_at": "2026-02-10T16:49:32Z",
-    "deletions": 304,
+    "comments_count": 14,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44173",
+    "created_at": "2026-02-20T09:28:48Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43896/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43896",
+    "files_url": "https://github.com/huggingface/transformers/pull/44173/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44173",
     "labels": [],
     "merged": true,
-    "number": 43896,
-    "review_comments_count": 1,
+    "number": 44173,
+    "review_comments_count": 10,
     "state": "closed",
-    "title": "Fix expected DAC outputs due to (old) change in CI settings.",
-    "updated_at": "2026-02-10T17:47:59Z"
+    "title": "fix(flaky): `test_generate_with_and_without_position_ids` in GLM ORC",
+    "updated_at": "2026-02-20T19:06:19Z"
   },
   {
-    "additions": 8,
-    "author": "SunMarc",
+    "additions": 2,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR reverts `place_model_on_device` back to a property. I prefer not introducing new args for now.",
-    "changed_files": 2,
+    "body_excerpt": "## Summary I've noticed `test_synthidtext_watermark_processor_distributional_convergence_*` was our slowest tests in CircleCI This PR speeds up the slowest SynthID distributional convergence tests by optimizing a hot path in SynthIDTextWat\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43895",
-    "created_at": "2026-02-10T15:06:36Z",
-    "deletions": 7,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44172",
+    "created_at": "2026-02-20T08:59:30Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43895/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43895",
+    "files_url": "https://github.com/huggingface/transformers/pull/44172/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44172",
     "labels": [],
     "merged": true,
-    "number": 43895,
+    "number": 44172,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "revert place_model_on_device to property",
-    "updated_at": "2026-02-11T10:37:06Z"
+    "title": "perf: Optimize SynthID logits processor batch index construction",
+    "updated_at": "2026-02-27T09:32:43Z"
   },
   {
-    "additions": 165,
-    "author": "JaredforReal",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? - fix k_norm as layernorm - add index_head_dim to config - rewrite GlmMoeDsaConfig from PreTrainedConfig - rewrite indexer as an nn.Module - fix mlp layers mismatch <!-- Congratulations! You've made it this far! You\u2026",
-    "changed_files": 3,
+    "additions": 2449,
+    "author": "lmaksym",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR adds TDT decoder support for Parakeet ASR models, extending the existing CTC-only implementation. It incorporates the initial TDT integration work from [#41545](https://github.com/huggingface/transformers/pu\u2026",
+    "changed_files": 28,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43894",
-    "created_at": "2026-02-10T14:56:42Z",
-    "deletions": 124,
+    "comments_count": 15,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44171",
+    "created_at": "2026-02-20T08:44:46Z",
+    "deletions": 294,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43894/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43894",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44171/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44171",
+    "labels": [
+      "New model",
+      "Audio"
+    ],
     "merged": false,
-    "number": 43894,
-    "review_comments_count": 8,
-    "state": "closed",
-    "title": "[fix] GlmMoeDsa model",
-    "updated_at": "2026-03-04T16:30:34Z"
+    "number": 44171,
+    "review_comments_count": 66,
+    "state": "open",
+    "title": "Parakeet tdt",
+    "updated_at": "2026-03-26T18:00:35Z"
   },
   {
-    "additions": 55,
-    "author": "ArthurZucker",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixes #43854. Long term we should probably have it in the post-init to share them @Cyrilvallez I am lazy today",
-    "changed_files": 20,
+    "additions": 74,
+    "author": "veeceey",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - Adds `GitForCausalLM` to `MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING_NAMES` so GIT can be used with the `visual-question-answering` pipeline - Filters tokenizer outputs in VQA pipeline `preprocess` to only pass keys accepted\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43893",
-    "created_at": "2026-02-10T14:16:10Z",
-    "deletions": 47,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44170",
+    "created_at": "2026-02-20T08:28:05Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43893/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43893",
+    "files_url": "https://github.com/huggingface/transformers/pull/44170/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44170",
     "labels": [],
-    "merged": true,
-    "number": 43893,
-    "review_comments_count": 5,
+    "merged": false,
+    "number": 44170,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Move `_keys_to_ignore_on_load_missing` for now",
-    "updated_at": "2026-02-11T13:47:43Z"
+    "title": "Add GIT model support in VQA pipeline",
+    "updated_at": "2026-02-20T09:34:31Z"
   },
   {
-    "additions": 1,
-    "author": "zucchini-nlp",
+    "additions": 415,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do?",
-    "changed_files": 1,
+    "body_excerpt": "Initial ty integration. To avoid a gigantic, risky patch, let's start with a baby step where we add the tooling to make repo-check and activate it on a subset of the repo. That gives us a human-readable patch, and allows us to get conforta\u2026",
+    "changed_files": 25,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43892",
-    "created_at": "2026-02-10T13:07:29Z",
-    "deletions": 2,
+    "comments_count": 12,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44167",
+    "created_at": "2026-02-20T07:39:44Z",
+    "deletions": 210,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43892/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43892",
+    "files_url": "https://github.com/huggingface/transformers/pull/44167/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44167",
     "labels": [],
     "merged": true,
-    "number": 43892,
-    "review_comments_count": 0,
+    "number": 44167,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Update hub metadata",
-    "updated_at": "2026-02-10T15:53:51Z"
+    "title": "chore(typing): initial ty integration",
+    "updated_at": "2026-02-20T19:08:09Z"
   },
   {
-    "additions": 36,
-    "author": "Mercury0226",
-    "author_association": "NONE",
-    "body_excerpt": "Closed PR. Test-only proposal for #7715.",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43891",
-    "created_at": "2026-02-10T12:04:08Z",
-    "deletions": 1,
+    "additions": 73,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "The CI does not output useful info on this flaky test - `tests.models.olmo.test_modeling_olmo.OlmoModelTest testMethod=test_generate_with_static_cache` and makes it harder to determine the root problem when not reproducible locally. This p\u2026",
+    "changed_files": 8,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44166",
+    "created_at": "2026-02-20T07:20:15Z",
+    "deletions": 61,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43891/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43891",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 43891,
-    "review_comments_count": 0,
+    "files_url": "https://github.com/huggingface/transformers/pull/44166/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44166",
+    "labels": [],
+    "merged": true,
+    "number": 44166,
+    "review_comments_count": 4,
     "state": "closed",
-    "title": "tests: extend RAG retriever smoke coverage (#7715)",
-    "updated_at": "2026-02-10T13:26:43Z"
+    "title": "Improve `has_similar_generate_outputs` assertions",
+    "updated_at": "2026-02-27T08:26:13Z"
   },
   {
-    "additions": 54,
-    "author": "Mercury0226",
-    "author_association": "NONE",
-    "body_excerpt": "Closed PR. Test-only proposal for #6045.",
+    "additions": 29,
+    "author": "alexmalyshev",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "With Python 3.14 and PEP 649, you can no longer expect `cls.__dict__[\"__annotations__\"]` to contain annotations for the exact class, it will be loaded lazily and can make it seem like the class doesn't have any annotations. The recommended\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43890",
-    "created_at": "2026-02-10T11:48:59Z",
-    "deletions": 0,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44165",
+    "created_at": "2026-02-20T04:47:07Z",
+    "deletions": 14,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43890/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43890",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44165/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44165",
+    "labels": [],
     "merged": false,
-    "number": 43890,
+    "number": 44165,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "tests: add BART memory regression guard test (#6045)",
-    "updated_at": "2026-02-10T13:26:45Z"
+    "title": "Fix how PreTrainedModel checks annotations on Python 3.14+",
+    "updated_at": "2026-03-20T17:31:11Z"
   },
   {
-    "additions": 24,
-    "author": "vasqu",
-    "author_association": "MEMBER",
-    "body_excerpt": "As per title",
+    "additions": 7,
+    "author": "lhallee",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Fixes: https://github.com/huggingface/transformers/issues/44162 @ArthurZucker @Cyrilvallez",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43889",
-    "created_at": "2026-02-10T11:25:01Z",
-    "deletions": 18,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44163",
+    "created_at": "2026-02-19T21:44:25Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43889/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43889",
+    "files_url": "https://github.com/huggingface/transformers/pull/44163/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44163",
     "labels": [],
     "merged": true,
-    "number": 43889,
+    "number": 44163,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[`Jamba`] Fallback to slow path and warn instead of error out",
-    "updated_at": "2026-02-10T12:35:49Z"
+    "title": "ESM2 attention_mask and token_dropout fix",
+    "updated_at": "2026-02-20T15:17:31Z"
   },
   {
-    "additions": 2074,
-    "author": "bhargav-patel-29",
+    "additions": 379,
+    "author": "cogniera",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR adds support for **Param-2-17B-MoE-A2.4B**, a large-scale Mixture-of-Experts (MoE) causal language model. Param-2-17B-MoE-A2.4B uses a **Hybrid Dense + MoE architecture** with 17B total parameters while acti\u2026",
-    "changed_files": 8,
+    "body_excerpt": "What does this PR do? This PR refactors the LongT5 model to use the @capture_outputs and @can_return_tuple decorators for standardized output handling across the model stack. The refactor removes manual handling of: output_attentions outpu\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43888",
-    "created_at": "2026-02-10T11:02:13Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44161",
+    "created_at": "2026-02-19T20:46:49Z",
+    "deletions": 170,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43888/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43888",
+    "files_url": "https://github.com/huggingface/transformers/pull/44161/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44161",
     "labels": [],
     "merged": false,
-    "number": 43888,
+    "number": 44161,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Support for BharatGen's Param2MoE model architecture",
-    "updated_at": "2026-02-23T07:28:02Z"
+    "title": "Refactor LongT5 to use @capture_outputs and @can_return_tuple decorators for unified output handling (Fixes #43979)",
+    "updated_at": "2026-02-20T17:28:12Z"
   },
   {
-    "additions": 12,
-    "author": "eustlb",
+    "additions": 2104,
+    "author": "molbap",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? MistralCommonBackend does not implement `added_tokens_decoder` so any call to PreTrainedTokenizerBase's `__repr__ `fails.",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? Hey yall, I started porting the pi0 model so Transformers can be a backend for VLAs directly. I checked it against LeRobot on fix/lerobot_openpi: outputs seem to match and for sure lerobot/pi0_base loads cleanly (no\u2026",
+    "changed_files": 22,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43887",
-    "created_at": "2026-02-10T10:55:08Z",
-    "deletions": 0,
+    "comments_count": 13,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44160",
+    "created_at": "2026-02-19T17:16:29Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43887/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43887",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44160/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44160",
+    "labels": [
+      "New model"
+    ],
     "merged": true,
-    "number": 43887,
-    "review_comments_count": 0,
+    "number": 44160,
+    "review_comments_count": 32,
     "state": "closed",
-    "title": "[MistralCommonBackend] fix loading proc",
-    "updated_at": "2026-02-10T12:32:24Z"
+    "title": "Add model lerobot PI0 to transformers",
+    "updated_at": "2026-03-16T10:23:14Z"
   },
   {
-    "additions": 20,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "Fixes https://github.com/huggingface/transformers/issues/43878 After the refactor we started saving `out_features` and `stage_names` in timm backbone config, because it now also inherits from `BackboneConfigMixin`. But the modeling code wo\u2026",
-    "changed_files": 3,
+    "additions": 67,
+    "author": "samuelleecong",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Summary Closes #28103 - Refactor `OwlViTAttention` to use `ALL_ATTENTION_FUNCTIONS` for dynamic attention backend dispatch (same pattern as CLIP) - Add `eager_attention_forward` standalone function with the standardized interface - Resh\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43886",
-    "created_at": "2026-02-10T10:10:09Z",
-    "deletions": 9,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43886/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43886",
-    "labels": [],
-    "merged": true,
-    "number": 43886,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Timm backbone saves and loads `out_features`",
-    "updated_at": "2026-02-12T15:57:04Z"
-  },
-  {
-    "additions": 2,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 2,
-    "cluster_id": "cluster-43488-10",
-    "cluster_ids": [
-      "cluster-43488-10"
-    ],
-    "cluster_role": "member",
-    "comments_count": 8,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43884",
-    "created_at": "2026-02-10T09:55:40Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44159",
+    "created_at": "2026-02-19T16:31:44Z",
+    "deletions": 61,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43884/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43884",
+    "files_url": "https://github.com/huggingface/transformers/pull/44159/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44159",
     "labels": [],
     "merged": false,
-    "number": 43884,
+    "number": 44159,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "don't merge check workflow",
-    "updated_at": "2026-02-13T02:03:43Z"
+    "state": "open",
+    "title": "Add SDPA and Flash Attention support for OWL-ViT",
+    "updated_at": "2026-02-24T12:53:10Z"
   },
   {
-    "additions": 3,
-    "author": "tomaarsen",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Resolves https://github.com/huggingface/transformers/pull/42564#issuecomment-3874606093 #42564 updated `get_image_features` for Llama4, but it erroneously started using `pooler_output` instead of the previous `last_\u2026",
-    "changed_files": 1,
+    "additions": 141,
+    "author": "leopold-tzafon",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# Fix issue where `use_cache=False`, corrupts model Qwen3vl output. Tested with: ``` import torch from transformers import Qwen3VLForConditionalGeneration, AutoProcessor MODEL_NAME = \"Qwen/Qwen3-VL-4B-Instruct\" DEVICE = \"cuda\" DTYPE = torc\u2026",
+    "changed_files": 9,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43882",
-    "created_at": "2026-02-10T08:38:51Z",
-    "deletions": 3,
+    "comments_count": 19,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44158",
+    "created_at": "2026-02-19T15:45:13Z",
+    "deletions": 36,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43882/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43882",
+    "files_url": "https://github.com/huggingface/transformers/pull/44158/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44158",
     "labels": [],
     "merged": true,
-    "number": 43882,
-    "review_comments_count": 2,
+    "number": 44158,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[`fix`] Use `last_hidden_state` key from `get_image_features` for llama4",
-    "updated_at": "2026-02-10T15:05:50Z"
+    "title": "fix bug with position_ids on qwen3-vl models, such that position_ids include text position",
+    "updated_at": "2026-02-23T14:53:33Z"
   },
   {
-    "additions": 14,
-    "author": "jiqing-feng",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "After the \"Refactor weight loading\" commit (#41580), UMT5EncoderModel fails to properly tie encoder.embed_tokens.weight to shared.weight when loading checkpoints with tie_word_embeddings=False (e.g., Wan-AI video generation models). This c\u2026",
-    "changed_files": 4,
+    "additions": 689,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per title, gets rid of `if/else` per attn implementation",
+    "changed_files": 24,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 11,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43880",
-    "created_at": "2026-02-10T07:26:07Z",
-    "deletions": 2,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44157",
+    "created_at": "2026-02-19T14:49:49Z",
+    "deletions": 834,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43880/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43880",
+    "files_url": "https://github.com/huggingface/transformers/pull/44157/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44157",
     "labels": [],
-    "merged": true,
-    "number": 43880,
-    "review_comments_count": 3,
-    "state": "closed",
-    "title": "Fix UMT5EncoderModel embedding weights not being tied after loading",
-    "updated_at": "2026-02-19T12:42:24Z"
+    "merged": false,
+    "number": 44157,
+    "review_comments_count": 10,
+    "state": "open",
+    "title": "Use correct mask for packed inputs in Qwen-VL ",
+    "updated_at": "2026-02-24T13:13:43Z"
   },
   {
-    "additions": 17,
-    "author": "cyyever",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 17,
+    "additions": 2,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR fixes a minor error when using aqml quantization. We specified the wrong argument.",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43879",
-    "created_at": "2026-02-10T06:42:49Z",
-    "deletions": 17,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44156",
+    "created_at": "2026-02-19T14:35:38Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43879/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43879",
+    "files_url": "https://github.com/huggingface/transformers/pull/44156/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44156",
     "labels": [],
-    "merged": true,
-    "number": 43879,
+    "merged": false,
+    "number": 44156,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix old tech stack in doc",
-    "updated_at": "2026-02-10T13:58:11Z"
+    "title": "Fix aqml `modules_to_not_convert`",
+    "updated_at": "2026-03-27T16:50:02Z"
   },
   {
-    "additions": 39,
-    "author": "weiguangli-io",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary Fixes #43874 by adding `get_number_of_image_patches` to `Glm46VImageProcessorFast`. `Glm46VProcessor._get_num_multimodal_tokens` calls this method on `self.image_processor`, which raises an `AttributeError` when the fast image p\u2026",
-    "changed_files": 4,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 10,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43877",
-    "created_at": "2026-02-10T04:50:19Z",
-    "deletions": 10,
+    "additions": 44,
+    "author": "Aatman09",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-43979-28",
+    "cluster_ids": [
+      "cluster-43979-28"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44154",
+    "created_at": "2026-02-19T12:17:56Z",
+    "deletions": 52,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43877/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43877",
+    "files_url": "https://github.com/huggingface/transformers/pull/44154/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44154",
     "labels": [],
-    "merged": true,
-    "number": 43877,
-    "review_comments_count": 5,
-    "state": "closed",
-    "title": "Fix missing fast image patch counter in Glm46V",
-    "updated_at": "2026-02-11T12:23:27Z"
+    "merged": false,
+    "number": 44154,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Refactored vits to match standardized output collection interface",
+    "updated_at": "2026-02-19T12:18:56Z"
   },
   {
-    "additions": 17,
-    "author": "weiguangli-io",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary This PR fixes #43864 by preserving the `GlmMoeDsaConfig` default `mlp_layer_types` from the modular source. `GlmMoeDsaConfig` should default to dense MLP for the first 3 layers and sparse afterward. During modular conversion, th\u2026",
+    "additions": 79,
+    "author": "IlyasMoutawwakil",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
     "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 8,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43876",
-    "created_at": "2026-02-10T04:28:23Z",
-    "deletions": 95,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44152",
+    "created_at": "2026-02-19T09:37:51Z",
+    "deletions": 45,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43876/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43876",
+    "files_url": "https://github.com/huggingface/transformers/pull/44152/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44152",
     "labels": [],
     "merged": true,
-    "number": 43876,
+    "number": 44152,
     "review_comments_count": 2,
     "state": "closed",
-    "title": "Fix GlmMoeDsaConfig default mlp_layer_types in modular conversion",
-    "updated_at": "2026-02-10T12:24:19Z"
+    "title": "AutoGrad support for grouped_mm fallback",
+    "updated_at": "2026-02-20T11:15:23Z"
   },
   {
-    "additions": 31,
-    "author": "cyyever",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Its reset should clear ```_quantized_keys``` and ```_quantized_values ```.",
+    "additions": 58,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "Part of the ongoing effort to migrate models to standardized output collection (ref #43979). Replaces manual output_hidden_states/output_attentions/return_dict handling with @capture_outputs and @can_return_tuple decorators in the BioGPT m\u2026",
     "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
+    "cluster_id": "cluster-43979-28",
+    "cluster_ids": [
+      "cluster-43979-28"
+    ],
+    "cluster_role": "member",
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43875",
-    "created_at": "2026-02-10T02:56:22Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44151",
+    "created_at": "2026-02-19T06:55:43Z",
+    "deletions": 134,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43875/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43875",
+    "files_url": "https://github.com/huggingface/transformers/pull/44151/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44151",
     "labels": [],
     "merged": false,
-    "number": 43875,
+    "number": 44151,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Improve handling of QuantizedLayer.reset",
-    "updated_at": "2026-02-11T00:46:54Z"
+    "state": "closed",
+    "title": "Refactor BioGPT output tracing with @capture_outputs and @can_return_tuple",
+    "updated_at": "2026-03-03T00:30:01Z"
   },
   {
-    "additions": 6,
-    "author": "Tanmaygangurde20",
+    "additions": 22,
+    "author": "ManasVardhan",
     "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? Fixes #43867 This PR fixes a `TypeError` in the `dot_natural_key` function in `src/transformers/core_model_loading.py` that occurs when sorting model state dictionary keys with mixed numeric and string structures. #\u2026",
+    "body_excerpt": "Part of the ongoing effort to migrate models to standardized output collection (ref #43979). Replaces manual output_hidden_states/output_attentions/return_dict handling with @capture_outputs and @can_return_tuple decorators in the MPT mode\u2026",
     "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43871",
-    "created_at": "2026-02-09T20:19:43Z",
-    "deletions": 4,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43871/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43871",
-    "labels": [
-      "Code agent slop"
+    "cluster_id": "cluster-43979-28",
+    "cluster_ids": [
+      "cluster-43979-28"
     ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44150",
+    "created_at": "2026-02-19T06:54:09Z",
+    "deletions": 73,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44150/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44150",
+    "labels": [],
     "merged": false,
-    "number": 43871,
+    "number": 44150,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix TypeError in dot_natural_key when sorting mixed structure keys",
-    "updated_at": "2026-02-10T12:45:05Z"
+    "title": "Refactor MPT output tracing with @capture_outputs and @can_return_tuple",
+    "updated_at": "2026-03-03T00:30:02Z"
   },
   {
-    "additions": 90,
-    "author": "daniel7an",
+    "additions": 85,
+    "author": "ManasVardhan",
     "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Adds `interpolate_pos_encoding` support to the **VitPoseBackbone** model, enabling pretrained checkpoints to be used on input images of different resolutions. This follows the same pattern established in other visi\u2026",
-    "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
+    "body_excerpt": "Part of the ongoing effort to migrate models to standardized output collection (ref #43979). Replaces manual output_hidden_states/output_attentions/return_dict handling with @capture_outputs and @can_return_tuple decorators in the CPMAnt m\u2026",
+    "changed_files": 4,
+    "cluster_id": "cluster-43979-28",
+    "cluster_ids": [
+      "cluster-43979-28"
+    ],
+    "cluster_role": "member",
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43870",
-    "created_at": "2026-02-09T19:20:34Z",
-    "deletions": 10,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44149",
+    "created_at": "2026-02-19T06:51:06Z",
+    "deletions": 201,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43870/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43870",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44149/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44149",
+    "labels": [],
     "merged": false,
-    "number": 43870,
+    "number": 44149,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add interpolate_pos_encoding to VitPoseBackbone",
-    "updated_at": "2026-02-10T12:48:09Z"
+    "title": "Refactor CPMAnt output tracing with @capture_outputs and @can_return_tuple",
+    "updated_at": "2026-03-03T00:30:03Z"
   },
   {
-    "additions": 8,
-    "author": "Rocketknight1",
-    "author_association": "MEMBER",
-    "body_excerpt": "There were some leftover references to the TranslationPipeline, so this PR removes them. Includes the cleanup from #43826 as well as some other changes!",
-    "changed_files": 5,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43869",
-    "created_at": "2026-02-09T18:35:27Z",
-    "deletions": 99,
+    "additions": 33,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "Part of the ongoing effort to migrate models to standardized output collection (ref #43979). Replaces manual output_hidden_states/output_attentions/return_dict handling with @capture_outputs and @can_return_tuple decorators in the Bros mod\u2026",
+    "changed_files": 4,
+    "cluster_id": "cluster-43979-28",
+    "cluster_ids": [
+      "cluster-43979-28"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44148",
+    "created_at": "2026-02-19T06:46:24Z",
+    "deletions": 124,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43869/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43869",
+    "files_url": "https://github.com/huggingface/transformers/pull/44148/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44148",
     "labels": [],
-    "merged": true,
-    "number": 43869,
-    "review_comments_count": 2,
+    "merged": false,
+    "number": 44148,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Remove remaining vestiges of the TranslationPipeline",
-    "updated_at": "2026-02-26T14:38:05Z"
+    "title": "Refactor Bros output tracing with @capture_outputs and @can_return_tuple",
+    "updated_at": "2026-03-03T00:30:05Z"
   },
   {
-    "additions": 1,
-    "author": "thecaptain789",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Fixes a simple typo in the timm backbones documentation. 'neccessary' \u2192 'necessary'",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
+    "additions": 11,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "Part of the ongoing effort to migrate models to standardized output collection (ref #43979). Replaces manual output_hidden_states/output_attentions/return_dict handling with @capture_outputs and @can_return_tuple decorators in the CTRL mod\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-43979-28",
+    "cluster_ids": [
+      "cluster-43979-28"
+    ],
+    "cluster_role": "member",
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43868",
-    "created_at": "2026-02-09T17:58:02Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44147",
+    "created_at": "2026-02-19T06:45:32Z",
+    "deletions": 47,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43868/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43868",
+    "files_url": "https://github.com/huggingface/transformers/pull/44147/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44147",
     "labels": [],
-    "merged": true,
-    "number": 43868,
+    "merged": false,
+    "number": 44147,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "docs: correct typo 'neccessary' to 'necessary'",
-    "updated_at": "2026-02-09T18:18:58Z"
+    "title": "Refactor CTRL output tracing with @capture_outputs and @can_return_tuple",
+    "updated_at": "2026-03-03T00:30:06Z"
   },
   {
-    "additions": 4,
-    "author": "Deep-unlearning",
-    "author_association": "MEMBER",
-    "body_excerpt": null,
-    "changed_files": 1,
+    "additions": 38,
+    "author": "khushali9",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? When using a step-based evaluation strategy (IntervalStrategy.STEPS), the trainer may skip evaluation at the final step if the last step does not align with eval_steps. This avoids missing the final evaluation while\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43865",
-    "created_at": "2026-02-09T15:12:33Z",
-    "deletions": 4,
+    "comments_count": 18,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44146",
+    "created_at": "2026-02-19T05:29:21Z",
+    "deletions": 11,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43865/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43865",
+    "files_url": "https://github.com/huggingface/transformers/pull/44146/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44146",
     "labels": [],
     "merged": true,
-    "number": 43865,
-    "review_comments_count": 17,
+    "number": 44146,
+    "review_comments_count": 14,
     "state": "closed",
-    "title": "update somes audio models",
-    "updated_at": "2026-02-13T10:25:47Z"
+    "title": "Ensure final evaluation runs with step-based evaluation strategy",
+    "updated_at": "2026-03-26T16:30:40Z"
   },
   {
-    "additions": 5,
-    "author": "eustlb",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This allows to do something like: ```python from transformers import AutoProcessor from datasets import Audio, load_dataset model_id = \"openai/whisper-large-v3\" processor = AutoProcessor.from_pretrained(model_id) da\u2026",
-    "changed_files": 1,
+    "additions": 400,
+    "author": "balvisio",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR adds support for sequence packing in the ESM2 model. Currently, the RotaryEmbedding class of the ESM2 model supports BSHD format. This PR makes the RotayEmbedding class aware of the`position_ids` and builds\u2026",
+    "changed_files": 8,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43863",
-    "created_at": "2026-02-09T14:18:04Z",
-    "deletions": 16,
+    "comments_count": 23,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44145",
+    "created_at": "2026-02-19T02:58:50Z",
+    "deletions": 216,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43863/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43863",
+    "files_url": "https://github.com/huggingface/transformers/pull/44145/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44145",
     "labels": [],
     "merged": false,
-    "number": 43863,
-    "review_comments_count": 0,
+    "number": 44145,
+    "review_comments_count": 9,
     "state": "open",
-    "title": "[whisper] allow to pass text/audio specific kwargs",
-    "updated_at": "2026-02-10T11:28:03Z"
+    "title": "Add THD support in ESM",
+    "updated_at": "2026-04-02T12:01:52Z"
   },
   {
-    "additions": 1642,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. `check_model_inputs` is becoming very complex and doing more than what it should. Let's separate into 2 clear decorators: - `capture_outputs`: everything related to capture outputs - `merge_with_co\u2026",
-    "changed_files": 261,
+    "additions": 1481,
+    "author": "TinderZ",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR adds 5 Chinese translations for common NLP task tutorials that were missing from the `docs/source/zh/tasks/` directory. The following files are added: - `tasks/sequence_classification.md` - \u6587\u672c\u5206\u7c7b - `tasks/tok\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43862",
-    "created_at": "2026-02-09T13:37:05Z",
-    "deletions": 996,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44144",
+    "created_at": "2026-02-19T02:35:08Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43862/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43862",
+    "files_url": "https://github.com/huggingface/transformers/pull/44144/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44144",
     "labels": [],
     "merged": true,
-    "number": 43862,
-    "review_comments_count": 4,
+    "number": 44144,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Separate `check_model_inputs` into `capture_outputs` and `merge_with_config_defaults` + ensure correctness",
-    "updated_at": "2026-02-10T17:37:46Z"
+    "title": "[docs] Add Chinese translations for common NLP task tutorials",
+    "updated_at": "2026-02-20T16:50:29Z"
   },
   {
     "additions": 2,
-    "author": "thecaptain789",
+    "author": "nightcityblade",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Fixes #43813 Corrects the typo in `src/transformers/integrations/peft.py` on lines 264 and 303 where `quantizatin_operations` should be `quantization_operations`.",
+    "body_excerpt": "## What does this PR do? Fixes minor typos in the `GenerationConfig` class docstring: - \"overriden\" \u2192 \"overridden\" - \"field that are\" \u2192 \"fields that are\" - \"Arg:\" \u2192 \"Args:\" (consistent with the rest of the docstring) No code changes, docum\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43861",
-    "created_at": "2026-02-09T11:50:51Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44143",
+    "created_at": "2026-02-18T23:07:23Z",
     "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43861/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43861",
-    "labels": [],
-    "merged": false,
-    "number": 43861,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix: correct typo 'quantizatin_operations' to 'quantization_operations'",
-    "updated_at": "2026-02-25T13:25:05Z"
-  },
-  {
-    "additions": 1,
-    "author": "pavel-esir",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? When tokenizer is red from `tokenizer.model` then model type whether it's `Unigram` or `BPE` is done incorrectly. Correct Unigram model type is 1 not 2 according to `SentencePiece` src https://github.com/google/sent\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43860",
-    "created_at": "2026-02-09T11:45:22Z",
-    "deletions": 1,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43860/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43860",
+    "files_url": "https://github.com/huggingface/transformers/pull/44143/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44143",
     "labels": [],
     "merged": true,
-    "number": 43860,
+    "number": 44143,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Update ModelType for Unigram tokenizer",
-    "updated_at": "2026-02-09T15:13:45Z"
+    "title": "[docs] Fix typos in GenerationConfig docstring",
+    "updated_at": "2026-02-19T13:24:09Z"
   },
   {
-    "additions": 1719,
-    "author": "Cyrilvallez",
+    "additions": 72,
+    "author": "eustlb",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title!",
-    "changed_files": 12,
+    "body_excerpt": "# What does this PR do? so @Deep-unlearning noticed, benchmarking for the Open ASR leaderbaord, that the current implem is particularly slow. That would make sense since we go through every layer of the encoder forward, and that the stream\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43858",
-    "created_at": "2026-02-09T10:28:19Z",
-    "deletions": 0,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44142",
+    "created_at": "2026-02-18T21:44:11Z",
+    "deletions": 12,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43858/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43858",
-    "labels": [
-      "New model"
-    ],
-    "merged": true,
-    "number": 43858,
+    "files_url": "https://github.com/huggingface/transformers/pull/44142/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44142",
+    "labels": [],
+    "merged": false,
+    "number": 44142,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Add GlmMoeDsa",
-    "updated_at": "2026-02-21T10:38:46Z"
+    "state": "open",
+    "title": "[voxtral-realtime] get more perfs!",
+    "updated_at": "2026-02-23T17:25:45Z"
   },
   {
-    "additions": 348,
-    "author": "ArthurZucker",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Should just be a way to skip remote code if we want to",
-    "changed_files": 4,
+    "additions": 42,
+    "author": "dario-fumarola",
+    "author_association": "NONE",
+    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44102 (original account: @fumadari). ## Summary Refactors the `ibert` model to use the new `@capture_outputs` and `@can_return_tuple` decorators for output tracing, as part of\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43857",
-    "created_at": "2026-02-09T10:13:39Z",
-    "deletions": 11,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44141",
+    "created_at": "2026-02-18T21:14:53Z",
+    "deletions": 154,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43857/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43857",
+    "files_url": "https://github.com/huggingface/transformers/pull/44141/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44141",
     "labels": [],
     "merged": false,
-    "number": 43857,
+    "number": 44141,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Allow to bypass remote code if we want to try and convert it",
-    "updated_at": "2026-02-26T15:19:02Z"
+    "state": "closed",
+    "title": "Refactor ibert output tracing with capture_outputs",
+    "updated_at": "2026-02-22T02:28:47Z"
   },
   {
-    "additions": 0,
-    "author": "kaixuanliu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "When all routing weights are zero, in this line [out_per_sample = out_per_sample * sample_weights.unsqueeze(-1)](https://github.com/huggingface/transformers/blob/main/src/transformers/integrations/moe.py#L153) , it will also output all zer\u2026",
+    "additions": 66,
+    "author": "dario-fumarola",
+    "author_association": "NONE",
+    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44104 (original account: @fumadari). ## Summary - Replace manual `hidden_states`/`attentions`/`cross_attentions` collection in `MegatronBertEncoder` with the `@capture_outputs`\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43855",
-    "created_at": "2026-02-09T08:40:45Z",
-    "deletions": 4,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44140",
+    "created_at": "2026-02-18T21:14:50Z",
+    "deletions": 207,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43855/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43855",
+    "files_url": "https://github.com/huggingface/transformers/pull/44140/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44140",
     "labels": [],
-    "merged": true,
-    "number": 43855,
+    "merged": false,
+    "number": 44140,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "delete unnecessary code to make moe compatible to full graph compile",
-    "updated_at": "2026-02-09T12:16:03Z"
+    "title": "Refactor megatron_bert to use automatic output tracing",
+    "updated_at": "2026-02-22T02:28:48Z"
   },
   {
-    "additions": 1,
-    "author": "sywangyi",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "- quantization: @SunMarc @MekkCyber ``` [rank0]: During handling of the above exception, another exception occurred: [rank0]: Traceback (most recent call last): [rank0]: File \"/mnt/disk3/wangyi/transformers/test_tp.py\", line 24, in <module\u2026",
+    "additions": 39,
+    "author": "dario-fumarola",
+    "author_association": "NONE",
+    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44105 (original account: @fumadari). ## Summary - Replace manual `hidden_states`/`attentions` collection in `LiltEncoder` with the `@capture_outputs` decorator and forward hook\u2026",
     "changed_files": 1,
     "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43853",
-    "created_at": "2026-02-09T06:39:17Z",
-    "deletions": 1,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44139",
+    "created_at": "2026-02-18T21:14:46Z",
+    "deletions": 127,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43853/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43853",
+    "files_url": "https://github.com/huggingface/transformers/pull/44139/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44139",
     "labels": [],
-    "merged": true,
-    "number": 43853,
+    "merged": false,
+    "number": 44139,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix gptoss crash in tp",
-    "updated_at": "2026-02-09T10:42:33Z"
+    "title": "Refactor lilt to use automatic output tracing",
+    "updated_at": "2026-02-22T02:28:48Z"
   },
   {
-    "additions": 41,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Currently, if slack reporting (re-usable) workflow or `check new failure` workflow fails, the CI triggered via a comment in a pull request will send a comment back to the PR page with \u2705 No failing test specific to t\u2026",
-    "changed_files": 4,
+    "additions": 51,
+    "author": "dario-fumarola",
+    "author_association": "NONE",
+    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44106 (original account: @fumadari). ## Summary - Replace manual `hidden_states`/`attentions` collection in `YosoEncoder` with the `@capture_outputs` decorator and forward hook\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 15,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43852",
-    "created_at": "2026-02-09T05:43:44Z",
-    "deletions": 2,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44138",
+    "created_at": "2026-02-18T21:14:42Z",
+    "deletions": 132,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43852/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43852",
+    "files_url": "https://github.com/huggingface/transformers/pull/44138/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44138",
     "labels": [],
-    "merged": true,
-    "number": 43852,
+    "merged": false,
+    "number": 44138,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "don't merge check",
-    "updated_at": "2026-02-10T09:45:42Z"
+    "title": "Refactor yoso to use automatic output tracing",
+    "updated_at": "2026-02-22T02:28:49Z"
   },
   {
-    "additions": 1,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do?",
+    "additions": 43,
+    "author": "dario-fumarola",
+    "author_association": "NONE",
+    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44107 (original account: @fumadari). ## Summary - Replaces manual `output_hidden_states`/`return_dict` resolution in `MraModel` with `@merge_with_config_defaults` and `@capture\u2026",
     "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43851",
-    "created_at": "2026-02-09T03:29:35Z",
-    "deletions": 0,
+    "cluster_id": "cluster-44107-10",
+    "cluster_ids": [
+      "cluster-44107-10"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44137",
+    "created_at": "2026-02-18T21:14:39Z",
+    "deletions": 113,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43851/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43851",
+    "files_url": "https://github.com/huggingface/transformers/pull/44137/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44137",
     "labels": [],
-    "merged": true,
-    "number": 43851,
+    "merged": false,
+    "number": 44137,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix slack-report workflow file",
-    "updated_at": "2026-02-09T05:41:32Z"
+    "title": "refactor(mra): use output tracing decorators",
+    "updated_at": "2026-02-22T02:28:50Z"
   },
   {
-    "additions": 2,
-    "author": "weiguangli-io",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary - remove `pipeline()`-based inference examples from the summarization and translation task docs - keep only direct `AutoTokenizer` + `AutoModelForSeq2SeqLM.generate` examples, which match v5 behavior ## Validation - `grep -R --l\u2026",
+    "additions": 37,
+    "author": "dario-fumarola",
+    "author_association": "NONE",
+    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44108 (original account: @fumadari). ## Summary - Adds `@merge_with_config_defaults` and `@capture_outputs` to both `VitDetModel` and `VitDetBackbone`, removing manual `output_\u2026",
     "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
+    "cluster_id": "cluster-44107-10",
+    "cluster_ids": [
+      "cluster-44107-10"
+    ],
+    "cluster_role": "member",
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43850",
-    "created_at": "2026-02-09T02:34:44Z",
-    "deletions": 25,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44136",
+    "created_at": "2026-02-18T21:14:35Z",
+    "deletions": 86,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43850/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43850",
+    "files_url": "https://github.com/huggingface/transformers/pull/44136/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44136",
     "labels": [],
     "merged": false,
-    "number": 43850,
+    "number": 44136,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Remove stale pipeline inference examples from v5 task docs",
-    "updated_at": "2026-02-09T12:24:03Z"
+    "title": "refactor(vitdet): use output tracing decorators",
+    "updated_at": "2026-02-22T02:28:50Z"
   },
   {
-    "additions": 42,
-    "author": "weiguangli-io",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary - only treat `translation_XX_to_YY` as a special task when the `translation` pipeline is actually registered - stop advertising `translation_XX_to_YY` in unknown-task error messages when translation is not supported - add regres\u2026",
+    "additions": 48,
+    "author": "dario-fumarola",
+    "author_association": "NONE",
+    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44109 (original account: @fumadari). ## Summary - Part of #43979 \u2014 refactors `hgnet_v2` to use the `capture_outputs` and `merge_with_config_defaults` decorators - Simplifies `H\u2026",
     "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
+    "cluster_id": "cluster-44107-10",
+    "cluster_ids": [
+      "cluster-44107-10"
+    ],
+    "cluster_role": "member",
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43849",
-    "created_at": "2026-02-09T02:31:22Z",
-    "deletions": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44135",
+    "created_at": "2026-02-18T21:14:31Z",
+    "deletions": 87,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43849/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43849",
+    "files_url": "https://github.com/huggingface/transformers/pull/44135/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44135",
     "labels": [],
     "merged": false,
-    "number": 43849,
+    "number": 44135,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix translation task validation when translation pipeline is unavailable",
-    "updated_at": "2026-03-03T09:42:38Z"
+    "title": "refactor(hgnet_v2): use capture_outputs for output tracing",
+    "updated_at": "2026-02-22T02:28:51Z"
   },
   {
-    "additions": 70,
-    "author": "weiguangli-io",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary Fixes #43698 by adding SwanLab resume init argument support in the Trainer integration. ## What changed - `SwanLabCallback.setup()` now forwards two optional env-based init args to `swanlab.init(...)`: - `SWANLAB_RUN_ID` -> `id`\u2026",
+    "additions": 28,
+    "author": "dario-fumarola",
+    "author_association": "NONE",
+    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44110 (original account: @fumadari). ## Summary - Part of #43979 \u2014 refactors `tvp` to use the `capture_outputs`, `can_return_tuple`, and `merge_with_config_defaults` decorators\u2026",
     "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43848",
-    "created_at": "2026-02-09T02:10:15Z",
-    "deletions": 1,
+    "cluster_id": "cluster-44107-10",
+    "cluster_ids": [
+      "cluster-44107-10"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44134",
+    "created_at": "2026-02-18T21:14:27Z",
+    "deletions": 101,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43848/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43848",
+    "files_url": "https://github.com/huggingface/transformers/pull/44134/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44134",
     "labels": [],
-    "merged": true,
-    "number": 43848,
+    "merged": false,
+    "number": 44134,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix SwanLab callback to forward resume init args",
-    "updated_at": "2026-02-10T12:57:08Z"
+    "title": "refactor(tvp): use capture_outputs for output tracing",
+    "updated_at": "2026-02-22T02:28:51Z"
   },
   {
-    "additions": 39,
-    "author": "tohtana",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? When using `HfDeepSpeedConfig` + ZeRO-3 with `from_config()`, the model's custom weight initialization (`_init_weights`) is silently skipped because parameters are already partitioned by `deepspeed.zero.Init`. The m\u2026",
+    "additions": 30,
+    "author": "dario-fumarola",
+    "author_association": "NONE",
+    "body_excerpt": "Migrated from https://github.com/huggingface/transformers/pull/44111 (original account: @fumadari). ## Summary - Part of #43979 \u2014 refactors `poolformer` to use the `capture_outputs`, `can_return_tuple`, and `merge_with_config_defaults` dec\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-44107-10",
+    "cluster_ids": [
+      "cluster-44107-10"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44133",
+    "created_at": "2026-02-18T21:12:22Z",
+    "deletions": 59,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44133/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44133",
+    "labels": [],
+    "merged": false,
+    "number": 44133,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "refactor(poolformer): use capture_outputs for output tracing",
+    "updated_at": "2026-02-22T02:28:52Z"
+  },
+  {
+    "additions": 13,
+    "author": "eustlb",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? #43674 broke voxtral processor",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43847",
-    "created_at": "2026-02-08T22:04:44Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44132",
+    "created_at": "2026-02-18T20:13:15Z",
+    "deletions": 34,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43847/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43847",
+    "files_url": "https://github.com/huggingface/transformers/pull/44132/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44132",
     "labels": [],
-    "merged": false,
-    "number": 43847,
-    "review_comments_count": 2,
+    "merged": true,
+    "number": 44132,
+    "review_comments_count": 6,
     "state": "closed",
-    "title": " Fix _from_config silently skipping weight initialization under DeepSpeed ZeRO-3",
-    "updated_at": "2026-02-27T18:50:02Z"
+    "title": "[voxtral] fix voxtral proc",
+    "updated_at": "2026-02-19T16:41:53Z"
   },
   {
-    "additions": 108,
-    "author": "vasanthrpjan1-boop",
+    "additions": 2,
+    "author": "cluster2600",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Add support for logging training metrics to the Pluto experiment tracker (https://pluto.trainy.ai/) as a native Trainer callback. This provides an alternative for users migrating from Neptune given its upcoming shutdown. Closes #43724 # Wh\u2026",
-    "changed_files": 4,
+    "body_excerpt": "## What Two small corrections in `docs/source/en/quantization/overview.md`: 1. **Typo fix**: `AuoQuant Notebook` \u2192 `AutoQuant Notebook` in the *User-Friendly Quantization Tools* section. The letter `t` was missing from the link text. 2. **\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43843",
-    "created_at": "2026-02-08T14:29:51Z",
-    "deletions": 5,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44131",
+    "created_at": "2026-02-18T19:25:52Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43843/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43843",
+    "files_url": "https://github.com/huggingface/transformers/pull/44131/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44131",
     "labels": [],
-    "merged": false,
-    "number": 43843,
+    "merged": true,
+    "number": 44131,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add PlutoCallback integration for Pluto experiment tracker",
-    "updated_at": "2026-02-08T14:30:36Z"
+    "title": "docs: fix typo 'AuoQuant' \u2192 'AutoQuant' and clarify FINEGRAINED_FP8 library column",
+    "updated_at": "2026-02-18T20:49:47Z"
   },
   {
-    "additions": 3,
-    "author": "Mr-Neutr0n",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes a `NameError: name 'TypeAdapter' is not defined` error when importing transformers without pydantic installed. ## Problem The `TypeAdapter` class from pydantic was used as a type annotation in `_validate_requ\u2026",
-    "changed_files": 1,
+    "additions": 302,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title. This PR is the first big step towards removing the `cache_position` everywhere, as they are not needed in general and everything can be inferred from the cache itself. The major changes are the fol\u2026",
+    "changed_files": 23,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43842",
-    "created_at": "2026-02-08T13:01:19Z",
-    "deletions": 1,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44130",
+    "created_at": "2026-02-18T11:58:54Z",
+    "deletions": 886,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43842/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43842",
+    "files_url": "https://github.com/huggingface/transformers/pull/44130/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44130",
     "labels": [],
-    "merged": false,
-    "number": 43842,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "fix(cli): Fix TypeAdapter NameError when pydantic is not installed",
-    "updated_at": "2026-02-08T16:08:04Z"
+    "merged": true,
+    "number": 44130,
+    "review_comments_count": 16,
+    "state": "closed",
+    "title": "[generate] Completely stop relying on `cache_position` to prepare inputs",
+    "updated_at": "2026-02-20T18:46:19Z"
   },
   {
-    "additions": 7,
-    "author": "Mr-Neutr0n",
+    "additions": 76,
+    "author": "preetam1407",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes a bug where `SwitchTransformersConfig` incorrectly creates sparse layers when `num_sparse_encoder_layers=0` or `num_sparse_decoder_layers=0` is set with a single-layer model. ## Problem When `num_sparse_encod\u2026",
+    "body_excerpt": "# What does this PR do? Refactors SpeechT5 to the standardized output tracing interface. - Adds `@capture_outputs` to base encoder/decoder forwards. - Adds `_can_record_outputs` mappings for hidden states and attentions. - Adds `@can_retur\u2026",
     "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43841",
-    "created_at": "2026-02-08T12:58:27Z",
-    "deletions": 3,
+    "cluster_id": "cluster-43979-28",
+    "cluster_ids": [
+      "cluster-43979-28"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44129",
+    "created_at": "2026-02-18T11:24:13Z",
+    "deletions": 222,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43841/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43841",
+    "files_url": "https://github.com/huggingface/transformers/pull/44129/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44129",
     "labels": [],
     "merged": false,
-    "number": 43841,
+    "number": 44129,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix(switch_transformers): Fix sparse layer creation when num_sparse_*_layers=0",
-    "updated_at": "2026-02-09T13:00:01Z"
+    "state": "open",
+    "title": "Refactor SpeechT5 output tracing to standardized output capture",
+    "updated_at": "2026-02-18T11:25:19Z"
   },
   {
-    "additions": 2,
-    "author": "Mr-Neutr0n",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes incorrect timestamp calculation in Qwen3VL Processor by using `temporal_patch_size` instead of `merge_size`. ## Problem The `_calculate_timestamps()` method was being called with `self.video_processor.merge_s\u2026",
+    "additions": 59,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Just makes sure we trigger dev version update",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43840",
-    "created_at": "2026-02-08T12:55:28Z",
-    "deletions": 2,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44128",
+    "created_at": "2026-02-18T10:42:21Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43840/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43840",
+    "files_url": "https://github.com/huggingface/transformers/pull/44128/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44128",
     "labels": [],
     "merged": false,
-    "number": 43840,
-    "review_comments_count": 0,
+    "number": 44128,
+    "review_comments_count": 4,
     "state": "closed",
-    "title": "fix(qwen3_vl): Use temporal_patch_size for timestamp calculation",
-    "updated_at": "2026-02-09T08:56:08Z"
+    "title": "update release workflow",
+    "updated_at": "2026-03-30T13:40:19Z"
   },
   {
-    "additions": 9,
-    "author": "Mr-Neutr0n",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes a `RuntimeError: expected mat1 and mat2 to have the same dtype` error when using `torch.autocast` with MoE models like `microsoft/Phi-tiny-MoE-instruct`. ## Problem `torch._grouped_mm` is not autocast-enabled\u2026",
-    "changed_files": 1,
+    "additions": 3,
+    "author": "itazap",
+    "author_association": "MEMBER",
+    "body_excerpt": "when the model_type isn't in `TOKENIZER_MAPPING_NAMES` (ex. \"llama\"), `TOKENIZER_MAPPING_NAMES.get(\"llama\", \"\")` --> \"\". Then we compare `\"\" != \"LlamaTokenizer\"` (the `tokenizer_class` in `tokenizer_config.json`). Since that's true we earl\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43839",
-    "created_at": "2026-02-08T12:21:19Z",
-    "deletions": 6,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44127",
+    "created_at": "2026-02-18T10:41:48Z",
+    "deletions": 8,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43839/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43839",
+    "files_url": "https://github.com/huggingface/transformers/pull/44127/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44127",
     "labels": [],
     "merged": true,
-    "number": 43839,
+    "number": 44127,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(moe): Handle dtype mismatch in torch._grouped_mm with autocast",
-    "updated_at": "2026-02-11T14:58:48Z"
-  },
-  {
-    "additions": 2908,
-    "author": "mbtariq82",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR adds Qwen3-ASR to the Transformers library. Fixes #43837 ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [x] Did you read the [co\u2026",
-    "changed_files": 15,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 17,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43838",
-    "created_at": "2026-02-08T12:05:43Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43838/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43838",
-    "labels": [
-      "New model",
-      "Audio"
-    ],
-    "merged": false,
-    "number": 43838,
-    "review_comments_count": 27,
-    "state": "open",
-    "title": "Proposal to add Qwen3-ASR support [WIP]",
-    "updated_at": "2026-03-20T17:14:42Z"
+    "title": "AutoTokenizer ignores config when model_type is None",
+    "updated_at": "2026-02-18T14:47:52Z"
   },
   {
-    "additions": 79,
-    "author": "pragnyanramtha",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Fixes #43824 what i think happened in #43824 is that waltwalt36 did not install the optional dependencies like pydantic, causing this issue. According to the core architecture docs, transformers implements a lazy loading mechanism for impo\u2026",
-    "changed_files": 1,
+    "additions": 17,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title. Let's simplify after https://github.com/huggingface/transformers/pull/42848",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43836",
-    "created_at": "2026-02-08T11:28:31Z",
-    "deletions": 70,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44126",
+    "created_at": "2026-02-18T09:58:49Z",
+    "deletions": 40,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43836/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43836",
+    "files_url": "https://github.com/huggingface/transformers/pull/44126/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44126",
     "labels": [],
-    "merged": false,
-    "number": 43836,
-    "review_comments_count": 2,
-    "state": "open",
-    "title": "fix: wrapped TypeAdpater in string literals (for now)",
-    "updated_at": "2026-02-17T04:46:27Z"
+    "merged": true,
+    "number": 44126,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Simplify input preparation in generate",
+    "updated_at": "2026-02-18T10:30:48Z"
   },
   {
-    "additions": 5,
-    "author": "nulone",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Fixes #43828 ## What does this PR do? `torch._grouped_mm` is not registered for autocast. Under `torch.autocast`, LayerNorm outputs float32 while model weights stay bfloat16, causing RuntimeError: \"expected mat1 and mat2 to have same dtype\u2026",
+    "additions": 8,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/43986",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43833",
-    "created_at": "2026-02-08T07:26:06Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44125",
+    "created_at": "2026-02-18T09:34:54Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43833/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43833",
+    "files_url": "https://github.com/huggingface/transformers/pull/44125/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44125",
     "labels": [],
-    "merged": false,
-    "number": 43833,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "fix: ensure dtype consistency in grouped_mm under autocast",
-    "updated_at": "2026-02-11T02:28:43Z"
+    "merged": true,
+    "number": 44125,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "Raise informative error when loading video processors",
+    "updated_at": "2026-02-20T08:23:35Z"
   },
   {
-    "additions": 0,
-    "author": "nulone",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Fixes #43827 ## What does this PR do? Removes deprecated `pipeline()` examples from summarization.md and translation.md that reference pre-v5 API. The manual `model.generate()` approach is preserved. ## Before submitting - [x] This PR fixe\u2026",
+    "additions": 10,
+    "author": "mariam851",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Description: Adds eval_on_end to TrainingArguments to force evaluation at the end of training, even if the last step doesn't align with eval_steps. Changes: training_args.py: Added eval_on_end field. trainer.py: Added logic to call evaluat\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43832",
-    "created_at": "2026-02-08T07:06:47Z",
-    "deletions": 27,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44124",
+    "created_at": "2026-02-18T08:52:23Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43832/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43832",
+    "files_url": "https://github.com/huggingface/transformers/pull/44124/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44124",
     "labels": [],
     "merged": false,
-    "number": 43832,
+    "number": 44124,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "docs: remove deprecated pipeline examples from summarization and tran\u2026",
-    "updated_at": "2026-02-08T07:19:52Z"
+    "title": "feat: add eval_on_end to Trainer for final evaluation",
+    "updated_at": "2026-02-18T14:14:16Z"
   },
   {
-    "additions": 0,
-    "author": "Mr-Neutr0n",
+    "additions": 33,
+    "author": "cyyever",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary - Removes `pipeline()`-based inference examples from summarization and translation task documentation - These examples no longer work in v5 since `SummarizationPipeline` and `TranslationPipeline` were removed ## Background Accor\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? This PR avoids device sync in training loss accumulation by ```torch.where```. The `is_torch_xla_available` condition is also removed.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43831",
-    "created_at": "2026-02-08T06:39:23Z",
-    "deletions": 27,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44123",
+    "created_at": "2026-02-18T08:22:57Z",
+    "deletions": 22,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43831/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43831",
+    "files_url": "https://github.com/huggingface/transformers/pull/44123/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44123",
     "labels": [],
-    "merged": true,
-    "number": 43831,
+    "merged": false,
+    "number": 44123,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "[docs] Remove pipeline() examples from summarization/translation tasks",
-    "updated_at": "2026-02-09T12:33:04Z"
+    "state": "open",
+    "title": "Avoid device sync in training loss accumulation",
+    "updated_at": "2026-03-30T07:57:16Z"
   },
   {
-    "additions": 7792,
-    "author": "bozheng-hit",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR adds the support of codes for the upcoming Qwen3.5 series models. For information about Qwen, please visit: \ud83d\udc49https://qwen.ai Special thanks to @JJJYmmm for helping complete the code in this PR. We also appreciate the valuable feedb\u2026",
-    "changed_files": 28,
+    "additions": 158,
+    "author": "adityuhkapoor",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? Adds 4-bit embedding quantization for BitsAndBytes, mirroring TorchAO's existing `include_input_output_embeddings` and `untie_embedding_weights` pattern (PRs #37802, #37905, #37935). Large-vocabulary models (Llama 3\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43830",
-    "created_at": "2026-02-08T05:51:57Z",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44122",
+    "created_at": "2026-02-18T06:35:09Z",
     "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43830/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43830",
+    "files_url": "https://github.com/huggingface/transformers/pull/44122/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44122",
     "labels": [
-      "New model"
+      "Code agent slop"
     ],
-    "merged": true,
-    "number": 43830,
+    "merged": false,
+    "number": 44122,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Adding Support for Qwen3.5",
-    "updated_at": "2026-03-03T02:26:31Z"
+    "title": "Add BnB 4-bit embedding quantization support",
+    "updated_at": "2026-02-18T14:27:25Z"
   },
   {
-    "additions": 30,
-    "author": "jayzuccarelli",
+    "additions": 14,
+    "author": "tirth8205",
     "author_association": "NONE",
-    "body_excerpt": "Fixes #43805 Follow-up to #43794: add a pytest fixture that sets a fixed seed (42) before each test so we always get the same RNG state in model tests and improve determinism. - **`tests/conftest.py`** (new): `set_seed` fixture with `autou\u2026",
+    "body_excerpt": "Fixes #34920 After applying `normalize()`, images can have negative values. Calling `resize()` on such images fails because it internally converts to PIL, which requires values in [0, 1] or [0, 255]. ### Fix When the image has values outsi\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43829",
-    "created_at": "2026-02-08T05:10:32Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44120",
+    "created_at": "2026-02-17T23:56:48Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43829/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43829",
+    "files_url": "https://github.com/huggingface/transformers/pull/44120/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44120",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 43829,
+    "number": 44120,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "chore(tests): add set_seed pytest fixture for determinism",
-    "updated_at": "2026-02-10T01:55:12Z"
+    "title": "fix: allow image_transforms.resize to handle negative values after normalization",
+    "updated_at": "2026-02-18T14:08:54Z"
   },
   {
-    "additions": 2,
-    "author": "math-hiyoko",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "additions": 1,
+    "author": "tirth8205",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes #44117 `TOKENIZER_MAPPING_NAMES.get(config_model_type, \"\")` returns `None` when the key exists with value `None`, causing `AttributeError: 'NoneType' object has no attribute 'replace'` when loading models like `google/siglip2-so400m-\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43826",
-    "created_at": "2026-02-08T01:49:06Z",
-    "deletions": 2,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44119",
+    "created_at": "2026-02-17T23:53:20Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43826/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43826",
+    "files_url": "https://github.com/huggingface/transformers/pull/44119/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44119",
     "labels": [],
     "merged": false,
-    "number": 43826,
+    "number": 44119,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "fix: error message of pipeline",
-    "updated_at": "2026-02-09T13:26:30Z"
+    "state": "closed",
+    "title": "fix: handle None value from TOKENIZER_MAPPING_NAMES.get() in AutoTokenizer",
+    "updated_at": "2026-02-18T14:04:47Z"
   },
   {
-    "additions": 2045,
-    "author": "redpanda1995",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 4,
+    "additions": 32,
+    "author": "tirth8205",
+    "author_association": "NONE",
+    "body_excerpt": "## Fix Fixes #44079 When a `ModelOutput` dataclass field is initialized as `None`, it is correctly excluded from the OrderedDict keys. However, **subsequently setting that field to a non-None value** via attribute assignment (e.g. `outputs\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43823",
-    "created_at": "2026-02-07T20:24:42Z",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44118",
+    "created_at": "2026-02-17T23:31:31Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43823/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43823",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44118/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44118",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 43823,
+    "number": 44118,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Add `facebook/MobileLLM-125M`",
-    "updated_at": "2026-02-09T11:48:23Z"
+    "state": "closed",
+    "title": "fix: ModelOutput keys not updated when setting previously-None dataclass fields",
+    "updated_at": "2026-02-18T14:18:12Z"
   },
   {
-    "additions": 15,
-    "author": "veeceey",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Fixes #43784 When PyTorch < 2.4 is installed, transformers v5.x disables PyTorch by making `is_torch_available()` return `False`. This causes the conditional import of `torch.nn as nn` (line 42) to be skipped. Howe\u2026",
+    "additions": 27,
+    "author": "dtiourine",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Migrate Flaubert to the @capture_outputs and @can_return_tuple decorator pattern for output handling, as part of #43979. # What does this PR do? - Add `_can_record_outputs = {\"attentions\": MultiHeadAttention}` on `FlaubertPreTrainedModel`\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43822",
-    "created_at": "2026-02-07T19:20:43Z",
-    "deletions": 13,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44116",
+    "created_at": "2026-02-17T21:52:13Z",
+    "deletions": 102,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43822/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43822",
+    "files_url": "https://github.com/huggingface/transformers/pull/44116/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44116",
     "labels": [],
     "merged": false,
-    "number": 43822,
+    "number": 44116,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix NameError: name 'nn' is not defined when PyTorch < 2.4",
-    "updated_at": "2026-02-09T12:53:52Z"
+    "state": "open",
+    "title": "[WIP] [Flaubert] Refactor output tracing to decorator-based interface",
+    "updated_at": "2026-02-17T21:53:23Z"
   },
   {
     "additions": 2,
-    "author": "redpanda1995",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 1,
+    "author": "Deep-unlearning",
+    "author_association": "MEMBER",
+    "body_excerpt": "## Summary - Fix broken `[chat template](./chat_templating)` links in `docs/source/en/tasks/` - `./chat_templating` resolves within `tasks/` (doesn't exist); corrected to `../chat_templating` - Affected files: `tasks/image_text_to_text.md`\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43821",
-    "created_at": "2026-02-07T19:04:11Z",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44115",
+    "created_at": "2026-02-17T21:32:55Z",
     "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43821/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43821",
+    "files_url": "https://github.com/huggingface/transformers/pull/44115/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44115",
     "labels": [],
     "merged": true,
-    "number": 43821,
+    "number": 44115,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix typo in quantization_operations in PEFT integrations",
-    "updated_at": "2026-02-16T17:59:57Z"
+    "title": "[docs] fix broken chat_templating links in tasks docs",
+    "updated_at": "2026-02-23T16:27:57Z"
   },
   {
-    "additions": 10,
-    "author": "harshaljanjani",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? The following issue was identified and fixed in this PR: \u2192 Implemented a fix that applies the straight-through estimator to each latent chunk before `out_proj`, following the straight-through estimator pattern use\u2026",
-    "changed_files": 2,
+    "additions": 716,
+    "author": "23atharvaS",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Summary This PR migrates the `wav2vec2` family to the standardized output-capturing interface (`@capture_outputs` + `@can_return_tuple`) and includes follow-up compatibility fixes required to make full CI green. ## What changed ### Core\u2026",
+    "changed_files": 19,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 17,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43820",
-    "created_at": "2026-02-07T14:44:51Z",
-    "deletions": 2,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44114",
+    "created_at": "2026-02-17T21:17:35Z",
+    "deletions": 1237,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43820/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43820",
+    "files_url": "https://github.com/huggingface/transformers/pull/44114/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44114",
     "labels": [],
-    "merged": true,
-    "number": 43820,
+    "merged": false,
+    "number": 44114,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix(models): Apply STE in Dac.from_latents to match the forward pass",
-    "updated_at": "2026-02-10T17:18:16Z"
+    "state": "open",
+    "title": "Migrate wav2vec2, wav2vec2_conformer, and wav2vec2_bert to standardized output collection decorators",
+    "updated_at": "2026-02-18T20:34:53Z"
   },
   {
-    "additions": 135,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "This is a follow-up work on trying to fix flakiness. Adding Global Deterministic Testing # Deterministic Testing Infrastructure - Summary N.B. this is for CPU-only tests ## Problem The test suite has flaky tests that failed intermittently\u2026",
-    "changed_files": 13,
+    "additions": 5,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? The following issue was identified and fixed in this PR: \u2192 Updates the stale `test_device_override` in `test_processing_granite_speech.py` to verify that the device param controls where speech inputs are placed, r\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43817",
-    "created_at": "2026-02-07T08:33:51Z",
-    "deletions": 57,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/43817/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43817",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44113",
+    "created_at": "2026-02-17T20:01:32Z",
+    "deletions": 7,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44113/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44113",
+    "labels": [],
+    "merged": true,
+    "number": 44113,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "fix(testing): Update stale device override test in GraniteSpeech",
+    "updated_at": "2026-02-19T11:24:29Z"
+  },
+  {
+    "additions": 30,
+    "author": "fumadari",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - Part of #43979 \u2014 refactors `poolformer` to use the `capture_outputs`, `can_return_tuple`, and `merge_with_config_defaults` decorators - Simplifies `PoolFormerLayer` to return a single tensor instead of a 1-tuple - Simplifies `\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-44107-10",
+    "cluster_ids": [
+      "cluster-44107-10"
+    ],
+    "cluster_role": "canonical",
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44111",
+    "created_at": "2026-02-17T19:38:02Z",
+    "deletions": 59,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44111/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44111",
     "labels": [],
     "merged": false,
-    "number": 43817,
+    "number": 44111,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(flaky): use a fixture for `set_seed` and single-threading [WIP]",
-    "updated_at": "2026-03-19T10:29:23Z"
+    "title": "refactor(poolformer): use capture_outputs for output tracing",
+    "updated_at": "2026-02-18T21:19:22Z"
   },
   {
-    "additions": 16,
-    "author": "thecaptain789",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Fixes #43698 Adds support for run resumption in the SwanLab integration via two new environment variables: - `SWANLAB_RUN_ID`: Unique identifier for the run - `SWANLAB_RESUME`: Controls resumption behavior (`must`, `allow`, `never`, `auto`\u2026",
+    "additions": 28,
+    "author": "fumadari",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - Part of #43979 \u2014 refactors `tvp` to use the `capture_outputs`, `can_return_tuple`, and `merge_with_config_defaults` decorators - Simplifies `TvpAttention` to always return `(output, attention_probs)` (hooks decide what to capt\u2026",
     "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43816",
-    "created_at": "2026-02-07T03:55:02Z",
-    "deletions": 0,
+    "cluster_id": "cluster-44107-10",
+    "cluster_ids": [
+      "cluster-44107-10"
+    ],
+    "cluster_role": "member",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44110",
+    "created_at": "2026-02-17T19:32:55Z",
+    "deletions": 101,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43816/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43816",
+    "files_url": "https://github.com/huggingface/transformers/pull/44110/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44110",
     "labels": [],
     "merged": false,
-    "number": 43816,
+    "number": 44110,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "fix: add id and resume parameters to SwanLab integration",
-    "updated_at": "2026-02-09T09:44:45Z"
+    "state": "closed",
+    "title": "refactor(tvp): use capture_outputs for output tracing",
+    "updated_at": "2026-02-18T21:19:24Z"
   },
   {
-    "additions": 1,
-    "author": "zyoraclub",
+    "additions": 48,
+    "author": "fumadari",
     "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43815",
-    "created_at": "2026-02-07T03:24:08Z",
-    "deletions": 0,
+    "body_excerpt": "## Summary - Part of #43979 \u2014 refactors `hgnet_v2` to use the `capture_outputs` and `merge_with_config_defaults` decorators - Simplifies `HGNetV2Encoder` by removing `return_dict` parameter (always returns `BaseModelOutputWithNoAttention`)\u2026",
+    "changed_files": 2,
+    "cluster_id": "cluster-44107-10",
+    "cluster_ids": [
+      "cluster-44107-10"
+    ],
+    "cluster_role": "member",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44109",
+    "created_at": "2026-02-17T19:23:03Z",
+    "deletions": 87,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43815/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43815",
+    "files_url": "https://github.com/huggingface/transformers/pull/44109/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44109",
     "labels": [],
     "merged": false,
-    "number": 43815,
-    "review_comments_count": 1,
+    "number": 44109,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Add missing import statement for os module",
-    "updated_at": "2026-02-09T06:28:30Z"
+    "title": "refactor(hgnet_v2): use capture_outputs for output tracing",
+    "updated_at": "2026-02-18T21:19:25Z"
   },
   {
-    "additions": 0,
-    "author": "cyyever",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This EncoderDecoderCache.batch_split is a remaining method from previous refactoring and is not used by other code.",
+    "additions": 33,
+    "author": "fumadari",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - Adds `@merge_with_config_defaults` and `@capture_outputs` to both `VitDetModel` and `VitDetBackbone`, removing manual `output_attentions`/`return_dict` resolution - Adds `_can_record_outputs = {\"attentions\": VitDetAttention}`\u2026",
     "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43814",
-    "created_at": "2026-02-07T03:04:26Z",
-    "deletions": 14,
+    "cluster_id": "cluster-44107-10",
+    "cluster_ids": [
+      "cluster-44107-10"
+    ],
+    "cluster_role": "member",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44108",
+    "created_at": "2026-02-17T19:15:00Z",
+    "deletions": 82,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43814/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43814",
+    "files_url": "https://github.com/huggingface/transformers/pull/44108/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44108",
     "labels": [],
-    "merged": true,
-    "number": 43814,
+    "merged": false,
+    "number": 44108,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Delete batch_split from EncoderDecoderCache",
-    "updated_at": "2026-02-09T13:14:21Z"
+    "title": "refactor(vitdet): use output tracing decorators",
+    "updated_at": "2026-02-18T21:19:27Z"
   },
   {
-    "additions": 5,
-    "author": "tobyliu2004",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes #43784 Fixes `NameError: name 'nn' is not defined` when importing transformers with PyTorch < 2.4. ## The Issue When PyTorch < 2.4 is detected, transformers disables PyTorch by making `is_torch_available()` re\u2026",
+    "additions": 40,
+    "author": "fumadari",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - Replaces manual `output_hidden_states`/`return_dict` resolution in `MraModel` with `@merge_with_config_defaults` and `@capture_outputs` decorators - Simplifies `MraEncoder` to a plain loop returning a single tensor, removing `\u2026",
     "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43812",
-    "created_at": "2026-02-06T19:53:01Z",
-    "deletions": 1,
+    "cluster_id": "cluster-44107-10",
+    "cluster_ids": [
+      "cluster-44107-10"
+    ],
+    "cluster_role": "member",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44107",
+    "created_at": "2026-02-17T19:04:42Z",
+    "deletions": 112,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43812/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43812",
+    "files_url": "https://github.com/huggingface/transformers/pull/44107/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44107",
     "labels": [],
     "merged": false,
-    "number": 43812,
+    "number": 44107,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Fix: Import torch.nn for type hints even when PyTorch is disabled",
-    "updated_at": "2026-02-07T09:35:29Z"
+    "state": "closed",
+    "title": "refactor(mra): use output tracing decorators",
+    "updated_at": "2026-02-18T21:19:29Z"
   },
   {
-    "additions": 6,
-    "author": "Rocketknight1",
-    "author_association": "MEMBER",
-    "body_excerpt": "The test `tests/models/pe_audio_video/test_modeling_pe_audio_video.py::PeAudioVideoEncoderTest::test_model_forward_default_config_values` is flaky in the CI. In local testing, it failed in 5 out of 100 runs for me. After some digging, I fi\u2026",
-    "changed_files": 2,
+    "additions": 47,
+    "author": "fumadari",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - Replace manual `hidden_states`/`attentions` collection in `YosoEncoder` with the `@capture_outputs` decorator and forward hooks - Add `@can_return_tuple` to all 5 wrapper model classes, eliminating manual `return_dict` handlin\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43811",
-    "created_at": "2026-02-06T19:46:15Z",
-    "deletions": 2,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44106",
+    "created_at": "2026-02-17T18:59:25Z",
+    "deletions": 132,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43811/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43811",
+    "files_url": "https://github.com/huggingface/transformers/pull/44106/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44106",
     "labels": [],
-    "merged": true,
-    "number": 43811,
+    "merged": false,
+    "number": 44106,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix video interpolation in pe_audio_video",
-    "updated_at": "2026-02-09T12:51:52Z"
+    "title": "Refactor yoso to use automatic output tracing",
+    "updated_at": "2026-02-18T21:19:30Z"
   },
   {
-    "additions": 10,
-    "author": "michaelfeil",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 4,
+    "additions": 39,
+    "author": "fumadari",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - Replace manual `hidden_states`/`attentions` collection in `LiltEncoder` with the `@capture_outputs` decorator and forward hooks - Add `@can_return_tuple` to all 3 wrapper model classes, eliminating manual `return_dict` handlin\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 13,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43810",
-    "created_at": "2026-02-06T18:24:13Z",
-    "deletions": 3,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44105",
+    "created_at": "2026-02-17T18:54:40Z",
+    "deletions": 127,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43810/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43810",
+    "files_url": "https://github.com/huggingface/transformers/pull/44105/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44105",
     "labels": [],
     "merged": false,
-    "number": 43810,
+    "number": 44105,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "add bidirectional attention to qwen and llama configs",
-    "updated_at": "2026-02-07T17:40:34Z"
+    "title": "Refactor lilt to use automatic output tracing",
+    "updated_at": "2026-02-18T21:19:32Z"
   },
   {
-    "additions": 90,
-    "author": "qgallouedec",
-    "author_association": "MEMBER",
-    "body_excerpt": "Trainer is going to be heavily refactored over the coming weeks/months (see #43595). To avoid the recurring pattern: PR merges in `transformers` \u2192 TRL CI breaks \u2192 follow-up PR in `transformers` to fix something we could have caught earlier\u2026",
+    "additions": 66,
+    "author": "fumadari",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary - Replace manual `hidden_states`/`attentions`/`cross_attentions` collection in `MegatronBertEncoder` with the `@capture_outputs` decorator and forward hooks - Add `@can_return_tuple` to all 8 wrapper model classes, eliminating m\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43809",
-    "created_at": "2026-02-06T17:05:13Z",
-    "deletions": 0,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44104",
+    "created_at": "2026-02-17T18:43:44Z",
+    "deletions": 207,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43809/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43809",
+    "files_url": "https://github.com/huggingface/transformers/pull/44104/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44104",
     "labels": [],
-    "merged": true,
-    "number": 43809,
+    "merged": false,
+    "number": 44104,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add TRL CI bot workflow to trigger tests on PR comments",
-    "updated_at": "2026-02-06T17:36:59Z"
+    "title": "Refactor megatron_bert to use automatic output tracing",
+    "updated_at": "2026-02-18T21:19:34Z"
   },
   {
-    "additions": 23,
-    "author": "stevhliu",
-    "author_association": "MEMBER",
-    "body_excerpt": "expands the base classes, inference, and training sections as these are quite important and will give readers a better idea of what's available",
-    "changed_files": 1,
+    "additions": 53,
+    "author": "engmohamedsalah",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes #44052 Now and then, the indexer ran into trouble switching between masks and cache. Most of the test failures came from these hiccups: - Indexer cache: the old if seq_len > 1: reset cache heuristic broke assisted decoding (multi-tok\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43808",
-    "created_at": "2026-02-06T16:50:11Z",
-    "deletions": 13,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44103",
+    "created_at": "2026-02-17T18:04:48Z",
+    "deletions": 76,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43808/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43808",
+    "files_url": "https://github.com/huggingface/transformers/pull/44103/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44103",
     "labels": [],
-    "merged": true,
-    "number": 43808,
+    "merged": false,
+    "number": 44103,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[docs] reveal more in toctree",
-    "updated_at": "2026-02-06T22:29:26Z"
+    "title": "Fix glm_moe_dsa",
+    "updated_at": "2026-02-18T19:38:11Z"
   },
   {
-    "additions": 319,
-    "author": "SunMarc",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR simplifies Trainer `__init__`: - Quantization validation extracted - PEFT unwrapping deduplicated - Liger Kernel extracted \u2014 apply_liger_kernel - Label smoother simplified - Validations grouped \u2014 `_validate_\u2026",
-    "changed_files": 5,
+    "additions": 42,
+    "author": "fumadari",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Refactors the `ibert` model to use the new `@capture_outputs` and `@can_return_tuple` decorators for output tracing, as part of the meta-issue #43979. **Key changes:** - Added `_can_record_outputs = {\"hidden_states\": IBertLayer,\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43807",
-    "created_at": "2026-02-06T16:18:12Z",
-    "deletions": 250,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44102",
+    "created_at": "2026-02-17T17:21:32Z",
+    "deletions": 154,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43807/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43807",
+    "files_url": "https://github.com/huggingface/transformers/pull/44102/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44102",
     "labels": [],
-    "merged": true,
-    "number": 43807,
-    "review_comments_count": 12,
+    "merged": false,
+    "number": 44102,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Refactor trainer init",
-    "updated_at": "2026-02-10T15:00:21Z"
+    "title": "Refactor ibert output tracing with capture_outputs",
+    "updated_at": "2026-02-18T21:19:35Z"
   },
   {
-    "additions": 915,
-    "author": "SunMarc",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR updates TrainingArguments in several places: - More tests - Simplify the post_init - update and reorder __init__ For the review, just check that the post_init is correctly modified",
-    "changed_files": 3,
+    "additions": 210,
+    "author": "aman-coder03",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? This PR refactors XLM's output tracing to align with the standardized output capturing patterns used across the codebase. ### Key changes: - Refactors transformer blocks into a dedicated `XLMLayer` module to enable\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43806",
-    "created_at": "2026-02-06T16:16:45Z",
-    "deletions": 639,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44101",
+    "created_at": "2026-02-17T17:15:06Z",
+    "deletions": 194,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43806/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43806",
+    "files_url": "https://github.com/huggingface/transformers/pull/44101/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44101",
     "labels": [],
-    "merged": true,
-    "number": 43806,
-    "review_comments_count": 7,
-    "state": "closed",
-    "title": "Update TrainingArguments",
-    "updated_at": "2026-02-24T12:32:04Z"
+    "merged": false,
+    "number": 44101,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[XLM] Refactor output tracing to align with capture_outputs standardized architecture",
+    "updated_at": "2026-02-19T08:08:33Z"
   },
   {
-    "additions": 31,
-    "author": "tarekziade",
+    "additions": 3,
+    "author": "qgallouedec",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? The difference check was returning False 40% of the times and was reproducible locally tested with ``` pytest -svx tests/models/ernie4_5_moe/test_modeling_ernie4_5_moe.py -k test_load_balancing_loss --flake-finder `\u2026",
+    "body_excerpt": "In https://github.com/huggingface/trl/pull/5112 a user reported that `trl sft --help` fails It's because three inherited args from `TrainingArguments` (`torch_empty_cache_steps`, `gradient_checkpointing` and `use_liger_kernel`)help strings\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 9,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43804",
-    "created_at": "2026-02-06T14:24:50Z",
-    "deletions": 11,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44100",
+    "created_at": "2026-02-17T17:10:36Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43804/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43804",
+    "files_url": "https://github.com/huggingface/transformers/pull/44100/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44100",
     "labels": [],
     "merged": true,
-    "number": 43804,
-    "review_comments_count": 2,
+    "number": 44100,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(flaky): Different approach to make sure loss exists",
-    "updated_at": "2026-02-20T07:45:27Z"
+    "title": "Fix percentage formatting in help messages for gradient checkpointing, Liger Kernel, and empty cache steps",
+    "updated_at": "2026-02-20T09:57:51Z"
   },
   {
     "additions": 2,
-    "author": "vasqu",
+    "author": "qgallouedec",
     "author_association": "MEMBER",
-    "body_excerpt": "As per title, I merged #43772 after #43400 which caused this mismatch - causes red CI",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43803",
-    "created_at": "2026-02-06T14:14:58Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44099",
+    "created_at": "2026-02-17T16:45:35Z",
     "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43803/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43803",
+    "files_url": "https://github.com/huggingface/transformers/pull/44099/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44099",
     "labels": [],
     "merged": true,
-    "number": 43803,
+    "number": 44099,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[`Repo Consistency`] Fix rms norm",
-    "updated_at": "2026-02-06T14:59:20Z"
+    "title": "Bump dev version",
+    "updated_at": "2026-02-18T10:03:54Z"
   },
   {
-    "additions": 7,
-    "author": "Sankalpkumarsingh1234",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? This PR adds a short real-world use case section to help users understand practical applications of text summarization. <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, y\u2026",
-    "changed_files": 1,
+    "additions": 125,
+    "author": "aman-coder03",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? This PR refactors ViLT's output handling to align with the standardized patterns used across the codebase. Key changes: - Removes manual `hidden_states`/`attentions` propagation and passes `output_attentions`, `out\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43802",
-    "created_at": "2026-02-06T13:41:03Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44098",
+    "created_at": "2026-02-17T16:32:34Z",
+    "deletions": 138,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43802/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43802",
+    "files_url": "https://github.com/huggingface/transformers/pull/44098/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44098",
     "labels": [],
     "merged": false,
-    "number": 43802,
+    "number": 44098,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Add real-world use case section for text summarization",
-    "updated_at": "2026-02-09T12:14:45Z"
+    "state": "open",
+    "title": "[ViLT] Refactor output handling to align with standardized patterns",
+    "updated_at": "2026-02-17T16:37:46Z"
   },
   {
-    "additions": 8,
-    "author": "robell",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "When using the torch.export path with this model we hit an issue on correction_coefs being modified with unknown side-effects the dynamo can't catch. So it lifts this tensor unnecessarily, and fails assuming mutation is needed RuntimeError\u2026",
+    "additions": 12,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "The `test_keep_in_fp32_modules` issues in #44052 are because the test assumes a model has **either** `_keep_in_fp32_modules` or `_keep_in_fp32_modules_strict` **but not both.** The only model that uses both is `glm_moe_dsa`, so this is the\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 8,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43801",
-    "created_at": "2026-02-06T13:26:58Z",
-    "deletions": 4,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44097",
+    "created_at": "2026-02-17T15:43:55Z",
+    "deletions": 42,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43801/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43801",
+    "files_url": "https://github.com/huggingface/transformers/pull/44097/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44097",
     "labels": [],
     "merged": true,
-    "number": 43801,
-    "review_comments_count": 3,
+    "number": 44097,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Fix lifted_tensor in Gemma3n export which dynamo can't reason about",
-    "updated_at": "2026-02-12T09:58:33Z"
+    "title": "Merge test_keep_in_fp32_modules and test_keep_in_fp32_modules_strict",
+    "updated_at": "2026-02-17T16:23:33Z"
   },
   {
-    "additions": 13182,
-    "author": "harshang03",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? This PR makes remove_unused_columns=True customizable for trainers that transform dataset columns before calling the model. It introduces an optional signature_columns argument on Trainer to control which dataset co\u2026",
-    "changed_files": 4,
+    "additions": 3,
+    "author": "eustlb",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Tests were written (and pass) on DGX A100, here are the values for our runners.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43800",
-    "created_at": "2026-02-06T13:07:32Z",
-    "deletions": 0,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/43800/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43800",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 43800,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44096",
+    "created_at": "2026-02-17T15:14:26Z",
+    "deletions": 3,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44096/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44096",
+    "labels": [],
+    "merged": true,
+    "number": 44096,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add new Trainer utilities and documentation",
-    "updated_at": "2026-02-06T13:42:08Z"
+    "title": "[voxtral-realtime] update runner expected values ",
+    "updated_at": "2026-02-17T15:23:19Z"
   },
   {
-    "additions": 14,
-    "author": "kashif",
+    "additions": 43,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? With this change: https://github.com/huggingface/accelerate/pull/3916 there is no torch device mesh when sp_backend=\"deepspeed\", and transformers currently assumes it exists. That\u2019s why you get: ``` sp_group = self.\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? As per the title. The check that was added in https://github.com/huggingface/transformers/pull/43768 is wrong, as a missing weight would NOT be reinitialized in some cases! As for the pointers check, it is actually\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43799",
-    "created_at": "2026-02-06T13:03:20Z",
-    "deletions": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44095",
+    "created_at": "2026-02-17T14:33:22Z",
+    "deletions": 47,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43799/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43799",
+    "files_url": "https://github.com/huggingface/transformers/pull/44095/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44095",
     "labels": [],
     "merged": true,
-    "number": 43799,
-    "review_comments_count": 3,
+    "number": 44095,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "[Trainer] use deepspeed SP process group when Accelerate doesn\u2019t build a mesh",
-    "updated_at": "2026-02-06T16:15:28Z"
+    "title": "Fix loading logic issue",
+    "updated_at": "2026-04-03T04:52:35Z"
   },
   {
-    "additions": 15,
-    "author": "harshaljanjani",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? The following failing tests were identified and fixed in this PR: \u2192 **BLOOM:** `batch_encode_plus()` method was removed from `PreTrainedTokenizerBase` in commit 05c0e1d390 (the \"rm slow tokenizers\" refactor, #4093\u2026",
-    "changed_files": 3,
+    "additions": 24,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Refactors the `codegen` model to use the `@capture_outputs` and `@can_return_tuple` decorators, replacing manual `output_attentions`/`output_hidden_states`/`return_dict` handling. ### Changes: - Add `_can_record_ou\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43798",
-    "created_at": "2026-02-06T13:00:13Z",
-    "deletions": 6,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44094",
+    "created_at": "2026-02-17T14:15:10Z",
+    "deletions": 70,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43798/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43798",
+    "files_url": "https://github.com/huggingface/transformers/pull/44094/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44094",
     "labels": [],
-    "merged": true,
-    "number": 43798,
+    "merged": false,
+    "number": 44094,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(testing): Fix BLOOM tokenizer, CLAP audio features, and CLVP text tester usage in tests",
-    "updated_at": "2026-02-06T13:57:32Z"
+    "title": "Refactor codegen model to use @capture_outputs and @can_return_tuple decorators",
+    "updated_at": "2026-03-03T00:30:07Z"
   },
   {
-    "additions": 88,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "Let's see if we can fix that flaky",
-    "changed_files": 10,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43797",
-    "created_at": "2026-02-06T12:14:02Z",
-    "deletions": 31,
+    "additions": 28,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Refactors the DeBERTa model to use the standardized output collection interface with `@capture_outputs` and `@can_return_tuple` decorators. ### Changes - Add `_can_record_outputs` on `DebertaPreTrainedModel` mapping `hidden_stat\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-43979-28",
+    "cluster_ids": [
+      "cluster-43979-28"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44093",
+    "created_at": "2026-02-17T14:15:07Z",
+    "deletions": 129,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43797/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43797",
+    "files_url": "https://github.com/huggingface/transformers/pull/44093/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44093",
     "labels": [],
     "merged": false,
-    "number": 43797,
+    "number": 44093,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "preventing I/O errors on closed streams in the `cli` helper",
-    "updated_at": "2026-02-06T15:03:26Z"
+    "title": "Refactor DeBERTa output tracing with @capture_outputs and @can_return_tuple",
+    "updated_at": "2026-03-03T00:30:09Z"
   },
   {
-    "additions": 37,
-    "author": "hemanth678901-stack",
+    "additions": 79,
+    "author": "ManasVardhan",
     "author_association": "NONE",
-    "body_excerpt": "This PR clarifies a common misconception around GPU memory usage during training, particularly when using the Adafactor optimizer with DeepSpeed. Several users expect Adafactor to always reduce peak GPU memory due to reduced optimizer stat\u2026",
-    "changed_files": 1,
+    "body_excerpt": "## What does this PR do? Refactors the **swinv2** model to use hook-based output capturing via `@capture_outputs` and `@can_return_tuple` decorators, removing the manual threading of `output_attentions`/`output_hidden_states`/`return_dict`\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43796",
-    "created_at": "2026-02-06T11:55:33Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44092",
+    "created_at": "2026-02-17T14:15:04Z",
+    "deletions": 159,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43796/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43796",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44092/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44092",
+    "labels": [],
     "merged": false,
-    "number": 43796,
+    "number": 44092,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Clarify GPU memory expectations when using Adafactor with DeepSpeed",
-    "updated_at": "2026-02-06T13:37:49Z"
+    "title": "Refactor output tracing for swinv2 model",
+    "updated_at": "2026-03-03T00:30:10Z"
   },
   {
-    "additions": 1464,
-    "author": "liu-jiaxuan",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes # (issue) ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Did you read the [contributor guideline](https://github.com/huggingfa\u2026",
-    "changed_files": 20,
-    "cluster_id": "cluster-43098-11",
+    "additions": 79,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Refactors the `swin` model to use the standardized output collection interface (`@capture_outputs` and `@can_return_tuple` decorators), as described in #43979. ### Changes **SwinPreTrainedModel:** - Added `_can_rec\u2026",
+    "changed_files": 2,
+    "cluster_id": "cluster-43979-28",
     "cluster_ids": [
-      "cluster-43098-11"
+      "cluster-43979-28"
     ],
     "cluster_role": "member",
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43795",
-    "created_at": "2026-02-06T11:22:56Z",
-    "deletions": 9,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44091",
+    "created_at": "2026-02-17T14:14:56Z",
+    "deletions": 146,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43795/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43795",
+    "files_url": "https://github.com/huggingface/transformers/pull/44091/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44091",
     "labels": [],
     "merged": false,
-    "number": 43795,
-    "review_comments_count": 38,
+    "number": 44091,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[Model] Add PP-OCRv5_server_rec Model Support",
-    "updated_at": "2026-03-18T16:33:35Z"
+    "title": "Refactor Swin output tracing with @capture_outputs and @can_return_tuple",
+    "updated_at": "2026-03-03T00:30:11Z"
   },
   {
-    "additions": 178,
-    "author": "tarekziade",
+    "additions": 25,
+    "author": "itazap",
     "author_association": "MEMBER",
-    "body_excerpt": "This patch aims to reduce flakiness in CI tests. We identified two causes of nondeterministic behavior: - Some tests were not using a fixed RNG seed, which reduced determinism. - The cli tests were occasionally triggering I/O errors due to\u2026",
-    "changed_files": 28,
+    "body_excerpt": "bos and eos behaviour should match when updating post processor setting `add_bos_token=True` when `bos_token=None` should silently disables `add_bos_token`. (was already the behavior for `eos_token`)",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43794",
-    "created_at": "2026-02-06T09:54:59Z",
-    "deletions": 114,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44090",
+    "created_at": "2026-02-17T13:15:07Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43794/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43794",
+    "files_url": "https://github.com/huggingface/transformers/pull/44090/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44090",
     "labels": [],
     "merged": true,
-    "number": 43794,
-    "review_comments_count": 1,
+    "number": 44090,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Enforce manual seed to reduce flakiness",
-    "updated_at": "2026-02-06T16:30:31Z"
+    "title": "Update post proc",
+    "updated_at": "2026-02-18T15:34:18Z"
   },
   {
-    "additions": 5225,
-    "author": "liu-jiaxuan",
+    "additions": 113,
+    "author": "preetam1407",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes # (issue) ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Did you read the [contributor guideline](https://github.com/huggingfa\u2026",
-    "changed_files": 15,
-    "cluster_id": "cluster-43098-11",
+    "body_excerpt": "Fixes #43979 ## Summary Refactor T5 to the standardized output tracing interface. ## Changes - Added `_can_record_outputs` on T5 encoder/decoder stack subclasses. - Added `@capture_outputs` on the base stack forward. - Added `@can_return_t\u2026",
+    "changed_files": 2,
+    "cluster_id": "cluster-43979-28",
     "cluster_ids": [
-      "cluster-43098-11"
+      "cluster-43979-28"
     ],
     "cluster_role": "member",
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43793",
-    "created_at": "2026-02-06T09:34:48Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44089",
+    "created_at": "2026-02-17T11:37:18Z",
+    "deletions": 294,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43793/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43793",
+    "files_url": "https://github.com/huggingface/transformers/pull/44089/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44089",
     "labels": [],
     "merged": false,
-    "number": 43793,
-    "review_comments_count": 4,
+    "number": 44089,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[Model] Add PP-OCRV5_mobile_rec Model Support",
-    "updated_at": "2026-03-19T10:50:04Z"
+    "title": "Refactor t5 output tracing",
+    "updated_at": "2026-02-17T13:45:23Z"
   },
   {
-    "additions": 25,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "When saving a tokenizer to a local directory and reloading it, the tokenizer type could change to an incorrect class (or fall back to TokenizersBackend) if the directory name contained a model type substring. Example: ```python tokenizer =\u2026",
-    "changed_files": 3,
+    "additions": 41,
+    "author": "Zephyr-Blessed",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Description Refactors GPT-2 model to use the standardized `@capture_outputs` and `@can_return_tuple` decorators, replacing manual output collection boilerplate. Part of #43979 ## Changes - **`GPT2PreTrainedModel`**: Added `_can_record_o\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 11,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43791",
-    "created_at": "2026-02-06T08:03:34Z",
-    "deletions": 24,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44088",
+    "created_at": "2026-02-17T11:32:42Z",
+    "deletions": 129,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44088/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44088",
+    "labels": [],
+    "merged": false,
+    "number": 44088,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Refactor GPT-2 output tracing with capture_outputs/can_return_tuple",
+    "updated_at": "2026-02-17T11:41:32Z"
+  },
+  {
+    "additions": 16,
+    "author": "huyxdang",
+    "author_association": "NONE",
+    "body_excerpt": "### Summary Refactors the Mamba2 model to use the standardized output collection interface as part of #43979. ### Changes * **Standardized Output Mapping**: Added `_can_record_outputs` to `Mamba2PreTrainedModel` mapping `hidden_states` \u2192 `\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44087",
+    "created_at": "2026-02-17T11:30:25Z",
+    "deletions": 33,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43791/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43791",
+    "files_url": "https://github.com/huggingface/transformers/pull/44087/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44087",
     "labels": [],
-    "merged": true,
-    "number": 43791,
-    "review_comments_count": 12,
+    "merged": false,
+    "number": 44087,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: Prevent AutoTokenizer type mismatch from directory name substrin\u2026",
-    "updated_at": "2026-02-17T13:23:42Z"
+    "title": "Refactor Mamba2 to use standardized output tracing",
+    "updated_at": "2026-03-11T02:08:22Z"
   },
   {
-    "additions": 6,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? So CircleCI will run with torch 2.10 CI is \u2705 see [here](https://app.circleci.com/pipelines/github/huggingface/transformers/163136/workflows/58ba2617-481b-4b4e-a456-d327806af088)",
-    "changed_files": 6,
+    "additions": 16,
+    "author": "Zephyr-Blessed",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Migrates **MGP-STR** to the standardized output collection interface using `@capture_outputs` and `@can_return_tuple` decorators. ### Changes: - Added `_can_record_outputs = {\"hidden_states\": MgpstrLayer, \"attentio\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43790",
-    "created_at": "2026-02-06T07:58:04Z",
-    "deletions": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44086",
+    "created_at": "2026-02-17T11:21:22Z",
+    "deletions": 48,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44086/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44086",
+    "labels": [],
+    "merged": false,
+    "number": 44086,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[MGP-STR] Refactor output tracing to use capture_outputs/can_return_tuple decorators",
+    "updated_at": "2026-02-17T11:22:25Z"
+  },
+  {
+    "additions": 37,
+    "author": "Zephyr-Blessed",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Refactors the RemBERT model to use the new output tracing decorators (`@capture_outputs` and `@can_return_tuple`), replacing manual output collection boilerplate. ### Changes: - Added `@capture_outputs` decorator t\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-43979-28",
+    "cluster_ids": [
+      "cluster-43979-28"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44085",
+    "created_at": "2026-02-17T11:09:55Z",
+    "deletions": 108,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43790/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43790",
+    "files_url": "https://github.com/huggingface/transformers/pull/44085/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44085",
     "labels": [],
-    "merged": true,
-    "number": 43790,
-    "review_comments_count": 7,
-    "state": "closed",
-    "title": "unpin torch for CircleCI",
-    "updated_at": "2026-02-06T11:48:30Z"
-  },
-  {
-    "additions": 268,
-    "author": "Jereshea",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? This PR introduces a `token_latency` feature to the generation. When enabled, it allows users to capture the execution time of each generation step, facilitating the measurement of key performance metrics like **Tim\u2026",
-    "changed_files": 3,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43789",
-    "created_at": "2026-02-06T07:04:37Z",
-    "deletions": 52,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43789/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43789",
-    "labels": [
-      "Code agent slop"
-    ],
     "merged": false,
-    "number": 43789,
+    "number": 44085,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Support to measure TTFT & TPOT",
-    "updated_at": "2026-02-06T13:32:43Z"
+    "state": "open",
+    "title": "Refactor RemBERT to use output tracing decorators",
+    "updated_at": "2026-02-17T11:10:59Z"
   },
   {
-    "additions": 18,
-    "author": "LynchXLQ",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Fixes #43698 This PR adds support for `id` and `resume` parameters in SwanLabCallback, enabling experiment continuation when resuming training. ### Changes - Added `SWANLAB_RUN_ID` environment variable support for\u2026",
+    "additions": 37,
+    "author": "Zephyr-Blessed",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Refactors the GPT-J model to use the new `capture_outputs` and `can_return_tuple` decorators for output tracing, following the same pattern as #44046 (CodeGen). ### Changes: - Added `@capture_outputs` decorator on\u2026",
     "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43788",
-    "created_at": "2026-02-06T02:29:19Z",
-    "deletions": 0,
+    "cluster_id": "cluster-43979-28",
+    "cluster_ids": [
+      "cluster-43979-28"
+    ],
+    "cluster_role": "member",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44084",
+    "created_at": "2026-02-17T11:08:48Z",
+    "deletions": 108,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43788/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43788",
+    "files_url": "https://github.com/huggingface/transformers/pull/44084/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44084",
     "labels": [],
     "merged": false,
-    "number": 43788,
+    "number": 44084,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add id and resume support for SwanLab integration",
-    "updated_at": "2026-02-09T16:18:39Z"
+    "title": "[GPT-J] Refactor output tracing to use capture_outputs/can_return_tuple decorators",
+    "updated_at": "2026-02-17T11:41:38Z"
   },
   {
-    "additions": 50,
-    "author": "cyyever",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR removes or replaces code to use APIs from PyTorch 2.4+.",
-    "changed_files": 24,
+    "additions": 2856,
+    "author": "3outeille",
+    "author_association": "MEMBER",
+    "body_excerpt": "This PR introduces **first-class FSDP2 (Fully Sharded Data Parallel v2) support** directly in Transformers, bypassing the need for Accelerate's FSDP wrapper. It covers the full lifecycle: model distribution, training, checkpointing, and CI\u2026",
+    "changed_files": 97,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43787",
-    "created_at": "2026-02-06T02:11:17Z",
-    "deletions": 95,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44083",
+    "created_at": "2026-02-17T10:57:06Z",
+    "deletions": 201,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43787/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43787",
+    "files_url": "https://github.com/huggingface/transformers/pull/44083/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44083",
     "labels": [],
-    "merged": true,
-    "number": 43787,
-    "review_comments_count": 4,
-    "state": "closed",
-    "title": "Remove unnecessary code or checks for PT 2.4+",
-    "updated_at": "2026-02-12T01:59:20Z"
+    "merged": false,
+    "number": 44083,
+    "review_comments_count": 24,
+    "state": "open",
+    "title": "FSDP2 native support in transformers ",
+    "updated_at": "2026-03-25T17:01:02Z"
   },
   {
-    "additions": 32,
-    "author": "kaixuanliu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Many users and tools use the OpenAI chat format for image inputs: `{\"type\": \"image_url\", \"image_url\": {\"url\": \"https://example.com/image.jpg\"}}` However, apply_chat_template only recognizes the HuggingFace-native format: `{\"type\": \"image\",\u2026",
-    "changed_files": 2,
+    "additions": 6,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/44077. Indeed, the call is not optional. This is slightly breaking as the defaut used to be False, so fresh model instantiation will now use a different init\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43786",
-    "created_at": "2026-02-06T01:50:42Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44082",
+    "created_at": "2026-02-17T10:09:03Z",
+    "deletions": 20,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43786/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43786",
+    "files_url": "https://github.com/huggingface/transformers/pull/44082/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44082",
     "labels": [],
     "merged": true,
-    "number": 43786,
+    "number": 44082,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "add openAI style `image_url` content support in `apply_chat_template`",
-    "updated_at": "2026-02-12T09:48:14Z"
+    "title": "Fix patchtsmixer call to post_init",
+    "updated_at": "2026-02-17T11:05:40Z"
   },
   {
-    "additions": 85,
-    "author": "MengAiDev",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "- Add _is_hf_initialized flag in _load_parameter_into_model to prevent unnecessary random initialization - Skip state_dict loading for non-rank0 processes when FSDP is enabled to avoid wasting CPU RAM - This fixes the issue where all ranks\u2026",
-    "changed_files": 3,
+    "additions": 48,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fixes #42533 by introducing default flash implementations. cc @vasqu and @cyrilvallez",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43785",
-    "created_at": "2026-02-06T00:46:08Z",
-    "deletions": 1,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43785/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43785",
-    "labels": [],
-    "merged": false,
-    "number": 43785,
-    "review_comments_count": 2,
-    "state": "open",
-    "title": "Fix FSDP_CPU_RAM_EFFICIENT_LOADING (#43749)",
-    "updated_at": "2026-02-11T11:17:53Z"
-  },
-  {
-    "additions": 55,
-    "author": "chry-santhemum",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? EDIT: see the discussion here https://github.com/huggingface/transformers/pull/43783#discussion_r2795768079 Add a check for deepspeed_zero3 in `_init_weights` for `nn.Embedding`. When initializing weights for `nn.Em\u2026",
-    "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43783",
-    "created_at": "2026-02-05T22:42:49Z",
-    "deletions": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44081",
+    "created_at": "2026-02-17T09:54:01Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43783/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43783",
+    "files_url": "https://github.com/huggingface/transformers/pull/44081/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44081",
     "labels": [],
-    "merged": false,
-    "number": 43783,
-    "review_comments_count": 5,
+    "merged": true,
+    "number": 44081,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "fix: ZeRO-3 crash for non-pretrained BERT in _init_weights",
-    "updated_at": "2026-02-18T03:24:28Z"
+    "title": "add default flash impl",
+    "updated_at": "2026-02-19T11:29:54Z"
   },
   {
-    "additions": 156,
-    "author": "gabe-l-hart",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Only overwrite the pretrained_model_name_or_path if needed with adapter The check is based on the assumption that if the current value is a path on disk and there is a `config.json` present in that path, the path points to a full model che\u2026",
-    "changed_files": 6,
+    "additions": 22,
+    "author": "tomaarsen",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? * Patch `__setitem__` on `ModelOutput` even if the parameter was previously `None` Fixes #44079, follow-up from #44050. Essentially, it brings behaviour to the expected as described in #44079: > If I 1) initialize a\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 10,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43781",
-    "created_at": "2026-02-05T21:39:09Z",
-    "deletions": 4,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44080",
+    "created_at": "2026-02-17T09:53:36Z",
+    "deletions": 8,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43781/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43781",
+    "files_url": "https://github.com/huggingface/transformers/pull/44080/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44080",
     "labels": [],
     "merged": true,
-    "number": 43781,
-    "review_comments_count": 7,
+    "number": 44080,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: Load model with co-located adapter from local path (Granite Speech)",
-    "updated_at": "2026-03-05T14:14:43Z"
+    "title": "Patch `__setitem__` on `ModelOutput` even if the parameter was previously `None`",
+    "updated_at": "2026-02-20T10:08:38Z"
   },
   {
-    "additions": 2,
-    "author": "qgallouedec",
+    "additions": 19,
+    "author": "tomaarsen",
     "author_association": "MEMBER",
-    "body_excerpt": "The changes in #43711 caused the model to be never prepared when using DeepSpeed. When training you hit for example: ``` [rank0]: Traceback (most recent call last): [rank0]: File \"/fsx/qgallouedec/trl/trl/scripts/grpo.py\", line 193, in <mo\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? * Set `input_modalities` on various architectures that aren't just text Sentence Transformers would like to rely on `input_modalities` in the future to determine what modalities can be used. However, it's not quite\u2026",
+    "changed_files": 10,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43780",
-    "created_at": "2026-02-05T20:59:07Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44078",
+    "created_at": "2026-02-17T09:15:34Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43780/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43780",
+    "files_url": "https://github.com/huggingface/transformers/pull/44078/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44078",
     "labels": [],
     "merged": true,
-    "number": 43780,
-    "review_comments_count": 2,
+    "number": 44078,
+    "review_comments_count": 6,
     "state": "closed",
-    "title": "Fix DeepSpeed model preparation logic in Trainer class",
-    "updated_at": "2026-02-06T19:20:01Z"
+    "title": "[`fix`] Set input_modalities on various architectures that aren't just text",
+    "updated_at": "2026-02-24T10:39:31Z"
   },
   {
-    "additions": 3,
-    "author": "surya10602",
+    "additions": 11,
+    "author": "mmahjoub5",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# Summary This PR updates `SwanLabCallback` to accept `**kwargs` in its `__init__` method and passes them to `swanlab.init()` during setup. Previously, the SwanLab integration did not expose important initialization arguments like `experim\u2026",
+    "body_excerpt": "# What does this PR do? This PR refactors the ImageGPT implementation to adopt the new `@capture_outputs` and `@can_return_tuple` decorators for standardized output collection, as part of https://github.com/huggingface/transformers/issues/\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43779",
-    "created_at": "2026-02-05T20:57:14Z",
-    "deletions": 1,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44076",
+    "created_at": "2026-02-17T08:46:55Z",
+    "deletions": 62,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43779/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43779",
+    "files_url": "https://github.com/huggingface/transformers/pull/44076/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44076",
     "labels": [],
     "merged": false,
-    "number": 43779,
+    "number": 44076,
     "review_comments_count": 0,
     "state": "open",
-    "title": "SwanLab: Add support for id and resume arguments in SwanLabCallback",
-    "updated_at": "2026-02-09T09:43:49Z"
+    "title": "Refectored modeling_imagegpt.py to enable hooks to capture_outputs",
+    "updated_at": "2026-02-18T04:11:40Z"
   },
   {
-    "additions": 93,
-    "author": "kevinli573",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Initializes dt_bias in the mixer class for both Mamba-1 and Mamba-2 (also initialized in PreTrainedModel class for meta device test). Addresses [#43717](https://github.com/huggingface/transformers/issues/43717)",
-    "changed_files": 4,
+    "additions": 66,
+    "author": "aman-coder03",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? This PR migrates TextNet to the new standardized output tracing system using the `@capture_outputs` and `@can_return_tuple` decorators. It adds `_can_record_outputs`, applies `@capture_outputs` to `TextNetModel.for\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 8,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43778",
-    "created_at": "2026-02-05T20:38:41Z",
-    "deletions": 81,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44074",
+    "created_at": "2026-02-17T08:23:25Z",
+    "deletions": 52,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43778/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43778",
+    "files_url": "https://github.com/huggingface/transformers/pull/44074/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44074",
     "labels": [],
-    "merged": true,
-    "number": 43778,
-    "review_comments_count": 5,
-    "state": "closed",
-    "title": "Mamba-1/-2 init weights in mixer class",
-    "updated_at": "2026-03-03T02:27:54Z"
+    "merged": false,
+    "number": 44074,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[TextNet] Refactor output tracing using capture_outputs decorator",
+    "updated_at": "2026-02-17T11:28:11Z"
   },
   {
-    "additions": 2,
-    "author": "qgallouedec",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 2,
+    "additions": 32,
+    "author": "aman-coder03",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? This PR migrates VisualBert to the new standardized output tracing system using the `@capture_outputs` and `@can_return_tuple` decorators. Specifically, this PR: - Adds `_can_record_outputs` to `VisualBertPreTraine\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43777",
-    "created_at": "2026-02-05T19:25:53Z",
-    "deletions": 2,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44073",
+    "created_at": "2026-02-17T08:16:59Z",
+    "deletions": 38,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43777/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43777",
+    "files_url": "https://github.com/huggingface/transformers/pull/44073/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44073",
     "labels": [],
-    "merged": true,
-    "number": 43777,
+    "merged": false,
+    "number": 44073,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Bump dev version",
-    "updated_at": "2026-02-06T08:03:39Z"
+    "state": "open",
+    "title": "[VisualBert] Refactor output tracing using capture_outputs and can_return_tuple decorators",
+    "updated_at": "2026-02-17T11:29:01Z"
   },
   {
-    "additions": 1598,
-    "author": "SunMarc",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR updates and simplifies tests for trainer. We never really had any issues with those tests, just cleaning a bit. 1) `tests/trainer/test_data_collator.py` - Restructured from 4 large classes (PyTorch/NumPy \u00d7 I\u2026",
-    "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43776",
-    "created_at": "2026-02-05T18:44:30Z",
-    "deletions": 1907,
+    "additions": 12,
+    "author": "Siddhartha7340",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# Refactor efficientnet output tracing # What does this PR do? This Pull Request migrates the EfficientNet model to use the standardized @capture_outputs and @can_return_tuple decorators. - Added _can_record_outputs to `EfficientNetPreTrai\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-43979-28",
+    "cluster_ids": [
+      "cluster-43979-28"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44072",
+    "created_at": "2026-02-17T07:42:01Z",
+    "deletions": 38,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43776/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43776",
+    "files_url": "https://github.com/huggingface/transformers/pull/44072/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44072",
     "labels": [],
-    "merged": true,
-    "number": 43776,
+    "merged": false,
+    "number": 44072,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Refactor trainer data_collator and callbacks tests",
-    "updated_at": "2026-02-06T16:06:16Z"
+    "state": "open",
+    "title": "refactor efficientnet output tracing with @capture_outputs and @can_r\u2026",
+    "updated_at": "2026-02-17T07:56:05Z"
   },
   {
-    "additions": 154,
-    "author": "Mr-Neutr0n",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary Fixes #43688 The auxiliary load balancing loss in MoE models was not correctly normalized when `top_k > 1`. The `tokens_per_expert` distribution (f_i) was summing to K instead of 1, while `router_prob_per_expert` (P_i) sums to 1\u2026",
-    "changed_files": 22,
+    "additions": 38,
+    "author": "ArivunidhiA",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Refactors the MPT model to use the new standardized output collection interface as part of #43979. ### Changes: - Added `_can_record_outputs` to `MptPreTrainedModel` mapping `hidden_states` \u2192 `MptBlock` and `attent\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43775",
-    "created_at": "2026-02-05T16:48:34Z",
-    "deletions": 66,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44071",
+    "created_at": "2026-02-17T07:19:17Z",
+    "deletions": 112,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43775/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43775",
+    "files_url": "https://github.com/huggingface/transformers/pull/44071/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44071",
     "labels": [],
     "merged": false,
-    "number": 43775,
+    "number": 44071,
     "review_comments_count": 0,
     "state": "open",
-    "title": "fix(moe): normalize auxiliary loss by top_k for correct load balancing",
-    "updated_at": "2026-02-05T18:14:28Z"
+    "title": "[Refactor] Migrate MPT to standardized output tracing decorators",
+    "updated_at": "2026-02-17T07:20:17Z"
   },
   {
-    "additions": 930,
-    "author": "mbtariq82",
+    "additions": 272,
+    "author": "rudybear",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR adds a new feature to TrainingArguments and Trainer: activation_offloading. When enabled, intermediate activations are offloaded to CPU during training to reduce GPU memory usage, which is useful for large m\u2026",
-    "changed_files": 5,
+    "body_excerpt": "## Summary - Add GGUF config mapping, defaults, and tokenizer converter for `qwen3_next` (Qwen3-Coder-Next, hybrid DeltaNet+Attention MoE, 80B total / 3B active) - Add `Qwen3NextTensorProcessor` handling DeltaNet-specific tensor transforms\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43774",
-    "created_at": "2026-02-05T15:52:16Z",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44070",
+    "created_at": "2026-02-17T07:18:13Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43774/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43774",
+    "files_url": "https://github.com/huggingface/transformers/pull/44070/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44070",
     "labels": [],
     "merged": false,
-    "number": 43774,
+    "number": 44070,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Add activation offloading to trainer",
-    "updated_at": "2026-03-04T20:47:27Z"
+    "state": "open",
+    "title": "Add GGUF loading support for Qwen3-Next (qwen3_next) architecture",
+    "updated_at": "2026-02-17T07:21:26Z"
   },
   {
-    "additions": 8,
-    "author": "ArthurZucker",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? We had to do this twice already!",
-    "changed_files": 3,
+    "additions": 26,
+    "author": "cyyever",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR uses ``torch.isfinite`` to simplify conditions, and the CUDA sync calls may also be reduced.",
+    "changed_files": 26,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43773",
-    "created_at": "2026-02-05T15:38:15Z",
-    "deletions": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44069",
+    "created_at": "2026-02-17T06:49:38Z",
+    "deletions": 48,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43773/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43773",
+    "files_url": "https://github.com/huggingface/transformers/pull/44069/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44069",
     "labels": [],
     "merged": true,
-    "number": 43773,
+    "number": 44069,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix-release-ubild",
-    "updated_at": "2026-02-06T08:56:52Z"
+    "title": "Use torch.isfinite",
+    "updated_at": "2026-02-18T01:04:19Z"
   },
   {
-    "additions": 252,
-    "author": "vasqu",
-    "author_association": "MEMBER",
-    "body_excerpt": "As per title, this led to weird dependencies where modeling files used direct imports",
-    "changed_files": 83,
+    "additions": 42,
+    "author": "mtthw13",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Replaces manual `output_attentions`/`output_hidden_states`/`return_dict` boilerplate in GPT-Neo with the hook-based decorator system. **Changes:** - Added `_can_record_outputs = {\"hidden_states\": GPTNeoBlock, \"attentions\": GPTNeoAttention}\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43772",
-    "created_at": "2026-02-05T15:29:21Z",
-    "deletions": 216,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44068",
+    "created_at": "2026-02-17T06:13:37Z",
+    "deletions": 119,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43772/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43772",
+    "files_url": "https://github.com/huggingface/transformers/pull/44068/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44068",
     "labels": [],
-    "merged": true,
-    "number": 43772,
-    "review_comments_count": 5,
-    "state": "closed",
-    "title": "[`Modular Dependencies`] Fixup qwen rms norms",
-    "updated_at": "2026-02-06T12:30:17Z"
+    "merged": false,
+    "number": 44068,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Refactor GPT-Neo to use `@capture_outputs` and `@can_return_tuple` decorators",
+    "updated_at": "2026-02-18T08:30:32Z"
   },
   {
-    "additions": 1525,
-    "author": "leoneperdigao",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary <!-- Add a brief summary of changes --> ## Related Issue Fixes #40170 **Issue:** Add MXFP4 MoE/attention backward kernels **URL:** https://github.com/huggingface/transformers/issues/40170 ## Problem ## A Call To Action! The Hugg\u2026",
-    "changed_files": 6,
+    "additions": 63,
+    "author": "23atharvaS",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? This PR introduces a new argument `eval_on_end` to the `Trainer` class. When enabled, the Trainer automatically runs evaluation at the end of training. This allows users to obtain final evaluation metrics without e\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43771",
-    "created_at": "2026-02-05T15:12:21Z",
-    "deletions": 4,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44067",
+    "created_at": "2026-02-17T05:25:26Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43771/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43771",
+    "files_url": "https://github.com/huggingface/transformers/pull/44067/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44067",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 43771,
+    "number": 44067,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: Add MXFP4 MoE/attention backward kernels",
-    "updated_at": "2026-03-24T14:14:44Z"
+    "title": "Add `eval_on_end` argument to Trainer for final evaluation after training",
+    "updated_at": "2026-02-17T13:32:34Z"
   },
   {
-    "additions": 47,
-    "author": "lordaarush",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Removes the unconditional `self.state.train_batch_size = self._train_batch_size` assignment that was causing issues when resuming from checkpoint with different batch configurations. The `train_batch_size` should on\u2026",
+    "additions": 35,
+    "author": "Jay-IIT",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Migrate GPT-J from manual boilerplate output collection to the new decorator-based output tracing system: - Add `_can_record_outputs` to `GPTJPreTrainedModel` - Add `@capture_outputs` and `@merge_with_config_defaults` to `GPTJModel.forward\u2026",
     "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43770",
-    "created_at": "2026-02-05T14:25:36Z",
-    "deletions": 1,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43770/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43770",
-    "labels": [],
-    "merged": true,
-    "number": 43770,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Remove unconditional train_batch_size assignment",
-    "updated_at": "2026-02-06T14:47:16Z"
-  },
-  {
-    "additions": 3950,
-    "author": "eustlb",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Adds voxtral realtime! ## benchmarks Using [this reproducer](https://gist.github.com/eustlb/367f062f77a5971291fb5350763bea8d), I've ran WER evals on ami, librispeech and fleurs, with results Dataset | Original (vllm\u2026",
-    "changed_files": 21,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43769",
-    "created_at": "2026-02-05T14:17:52Z",
-    "deletions": 2,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43769/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43769",
-    "labels": [
-      "New model",
-      "Audio"
-    ],
-    "merged": true,
-    "number": 43769,
-    "review_comments_count": 39,
-    "state": "closed",
-    "title": "Add Voxtral Realtime",
-    "updated_at": "2026-02-26T10:18:32Z"
-  },
-  {
-    "additions": 87,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Helps vLLM to bump to v5",
-    "changed_files": 6,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43768",
-    "created_at": "2026-02-05T14:04:02Z",
-    "deletions": 5,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43768/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43768",
-    "labels": [],
-    "merged": true,
-    "number": 43768,
-    "review_comments_count": 10,
-    "state": "closed",
-    "title": "Fix init weights in remote code",
-    "updated_at": "2026-02-17T14:45:18Z"
-  },
-  {
-    "additions": 850,
-    "author": "XingweiDeng",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 19,
-    "cluster_id": "cluster-43098-11",
+    "cluster_id": "cluster-43979-28",
     "cluster_ids": [
-      "cluster-43098-11"
+      "cluster-43979-28"
     ],
     "cluster_role": "member",
-    "comments_count": 14,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43767",
-    "created_at": "2026-02-05T13:54:13Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44066",
+    "created_at": "2026-02-17T05:12:11Z",
+    "deletions": 107,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43767/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43767",
+    "files_url": "https://github.com/huggingface/transformers/pull/44066/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44066",
     "labels": [],
-    "merged": true,
-    "number": 43767,
-    "review_comments_count": 116,
-    "state": "closed",
-    "title": "[Model] Add PP-Chart2Table Model Support",
-    "updated_at": "2026-03-19T19:12:37Z"
+    "merged": false,
+    "number": 44066,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Refactor GPT-J to use standardized output tracing (#43979)",
+    "updated_at": "2026-02-18T18:44:28Z"
   },
   {
-    "additions": 11,
-    "author": "hmellor",
-    "author_association": "MEMBER",
-    "body_excerpt": "`convert_rope_params_to_dict` assumes that `rope_theta` and `partial_rotary_factor` will be present in `kwargs`. This is only true if these parameters are not explicit arguments of the config class's `__init__` method. i.e. `convert_rope_p\u2026",
+    "additions": 21,
+    "author": "tysoncung",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary Refactors the CTRL model to use the standardized output collection interface as part of #43979. ## Changes - Added `_can_record_outputs` to `CTRLPreTrainedModel` mapping `hidden_states` \u2192 `EncoderLayer` and `attentions` \u2192 `Multi\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43766",
-    "created_at": "2026-02-05T12:28:26Z",
-    "deletions": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44065",
+    "created_at": "2026-02-17T02:03:57Z",
+    "deletions": 76,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43766/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43766",
+    "files_url": "https://github.com/huggingface/transformers/pull/44065/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44065",
     "labels": [],
-    "merged": true,
-    "number": 43766,
-    "review_comments_count": 3,
+    "merged": false,
+    "number": 44065,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix `convert_rope_params_to_dict` so it uses `rope_theta` from the config",
-    "updated_at": "2026-02-06T10:45:42Z"
+    "title": "Refactor CTRL model output tracing with capture_outputs and can_return_tuple",
+    "updated_at": "2026-02-25T00:49:18Z"
   },
   {
-    "additions": 728,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? ## The problem Currently, `check_model_inputs` needs to iterate on all modules and monkey-patch all needed submodule's `forward` on-the-fly, before restoring them afterwards. This brings 2 big issues: - It's NOT thr\u2026",
-    "changed_files": 121,
+    "additions": 57,
+    "author": "mariam851",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Description: Adds eval_on_end to TrainingArguments to trigger a final evaluation automatically after training finishes. Key Changes: TrainingArguments: Added eval_on_end boolean flag. Trainer.train: Logic to call evaluate() and merge metri\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 9,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43765",
-    "created_at": "2026-02-05T12:24:02Z",
-    "deletions": 565,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44064",
+    "created_at": "2026-02-17T01:10:31Z",
+    "deletions": 16,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43765/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43765",
-    "labels": [],
-    "merged": true,
-    "number": 43765,
-    "review_comments_count": 6,
+    "files_url": "https://github.com/huggingface/transformers/pull/44064/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44064",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 44064,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[core] Faster and thread-safe `check_model_inputs` implementation",
-    "updated_at": "2026-02-20T08:32:16Z"
+    "title": "feat: implement eval_on_end to trigger evaluation after training",
+    "updated_at": "2026-02-17T13:32:40Z"
   },
   {
-    "additions": 18,
-    "author": "YangKai0616",
+    "additions": 229,
+    "author": "AutumnAurelium",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Background: PR https://github.com/huggingface/transformers/pull/43672 This PR modifies the default attention implementation in `modernbert`, switching away from `FlashAttention`. It also restores the related tests that were previously over\u2026",
-    "changed_files": 9,
+    "body_excerpt": "# What does this PR do? This brings the Arcee AFMoE architecture in line with other MoE models' implementation patterns since v5. It also adds integration testing using Trinity Nano. ## Before submitting - [ ] This PR fixes a typo or impro\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43764",
-    "created_at": "2026-02-05T11:02:05Z",
-    "deletions": 364,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44063",
+    "created_at": "2026-02-17T01:07:13Z",
+    "deletions": 150,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43764/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43764",
+    "files_url": "https://github.com/huggingface/transformers/pull/44063/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44063",
     "labels": [],
     "merged": true,
-    "number": 43764,
-    "review_comments_count": 2,
+    "number": 44063,
+    "review_comments_count": 6,
     "state": "closed",
-    "title": ":rotating_light: Modify ModernBERT's default attention implementation to stop using FA",
-    "updated_at": "2026-02-06T11:26:44Z"
+    "title": "Update AFMoE architecture to use v5-style MoE impl",
+    "updated_at": "2026-03-19T14:00:46Z"
   },
   {
-    "additions": 100,
+    "additions": 2,
     "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Tweaks to `AGENTS.md` to make AI tools more efficient",
+    "body_excerpt": "Reproduced locally with ``` pytest -q -m generate --random-order-bucket=none --flake-finder --flake-runs=200 tests/models/kosmos2/test_modeling_kosmos2.py -k test_assisted_decoding_matches_greedy_search ``` Root cause: - prepare_config_and\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43763",
-    "created_at": "2026-02-05T10:25:59Z",
-    "deletions": 26,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44061",
+    "created_at": "2026-02-16T22:08:48Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43763/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43763",
+    "files_url": "https://github.com/huggingface/transformers/pull/44061/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44061",
     "labels": [],
     "merged": true,
-    "number": 43763,
-    "review_comments_count": 5,
+    "number": 44061,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Improved agents",
-    "updated_at": "2026-02-06T08:29:53Z"
+    "title": "Fix: flaky `Kosmos2ModelTest` test",
+    "updated_at": "2026-02-18T14:23:30Z"
   },
   {
-    "additions": 41,
-    "author": "molbap",
-    "author_association": "MEMBER",
-    "body_excerpt": "In case of module reloading, we currently lose tracking hooks for hidden states and attentions . Widening the matching condition a bit. Should fix https://github.com/huggingface/transformers/issues/43761, also mentioned in https://github.c\u2026",
-    "changed_files": 2,
+    "additions": 44,
+    "author": "lakprigan",
+    "author_association": "FIRST_TIMER",
+    "body_excerpt": "## Summary Migrates GPT2 to the standardized output collection interface as part of #43979. - Added `_can_record_outputs` to `GPT2PreTrainedModel` (including `cross_attentions` via `OutputRecorder` targeting the `crossattention` submodule)\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43762",
-    "created_at": "2026-02-05T10:15:43Z",
-    "deletions": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44059",
+    "created_at": "2026-02-16T20:14:30Z",
+    "deletions": 133,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43762/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43762",
+    "files_url": "https://github.com/huggingface/transformers/pull/44059/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44059",
     "labels": [],
     "merged": false,
-    "number": 43762,
-    "review_comments_count": 6,
-    "state": "closed",
-    "title": " Widen match condition for `_can_record_outputs`",
-    "updated_at": "2026-02-06T16:25:16Z"
+    "number": 44059,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[GPT2] Refactor output tracing to use capture_outputs/can_return_tuple decorators",
+    "updated_at": "2026-02-25T17:47:45Z"
   },
   {
-    "additions": 747,
-    "author": "Nitin75408",
+    "additions": 122,
+    "author": "engmohamedsalah",
     "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? Fixes issue #43746: when loading PEFT adapters from local directories, the code no longer overrides the local path with the hub path from the adapter config, preventing unnecessary hub downloads. Centralizes path re\u2026",
-    "changed_files": 6,
+    "body_excerpt": "## Summary Fixes #44052 \u2014 resolves 10 of 11 skipped tests for the `glm_moe_dsa` model. **Root causes fixed:** - **DSA indexer mask shape mismatch**: The attention mask was not properly normalized to 4D before being passed to the indexer an\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43760",
-    "created_at": "2026-02-05T09:31:54Z",
-    "deletions": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44058",
+    "created_at": "2026-02-16T19:24:30Z",
+    "deletions": 84,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43760/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43760",
+    "files_url": "https://github.com/huggingface/transformers/pull/44058/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44058",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 43760,
+    "number": 44058,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fixed Models with PEFT adapters won't load from local checkpoints issue",
-    "updated_at": "2026-02-05T13:00:37Z"
+    "title": "Fix skipped tests for glm_moe_dsa model",
+    "updated_at": "2026-02-17T17:23:03Z"
   },
   {
-    "additions": 4,
-    "author": "kaixuanliu",
+    "additions": 0,
+    "author": "mariam851",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "@IlyasMoutawwakil Hi, can you help review? Thx!",
-    "changed_files": 1,
+    "body_excerpt": "This PR addresses memory efficiency issues in the Qwen2Moe implementation (reported in #43856). Users experienced Out-of-Memory (OOM) errors during quantization and inference, particularly with large reserved memory (e.g., 27GB on H100) th\u2026",
+    "changed_files": 0,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43759",
-    "created_at": "2026-02-05T08:42:24Z",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44057",
+    "created_at": "2026-02-16T18:35:01Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43759/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43759",
+    "files_url": "https://github.com/huggingface/transformers/pull/44057/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44057",
     "labels": [],
-    "merged": true,
-    "number": 43759,
+    "merged": false,
+    "number": 44057,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "add use_deterministic to guarantee the consistency for youtu-llm model",
-    "updated_at": "2026-02-05T13:14:18Z"
+    "title": "fix(qwen3_moe): optimize memory and fix OOM in MoE layers",
+    "updated_at": "2026-02-16T21:47:41Z"
   },
   {
-    "additions": 41,
-    "author": "kaixuanliu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "@ydshieh can you help review? Thx!",
+    "additions": 50,
+    "author": "aman-coder03",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? This PR migrates MPNet to the new standardized output tracing system using the `@capture_outputs` decorator. Specifically, this PR: - Applies `@capture_outputs` to `MPNetModel.forward` - Removes manual accumulation\u2026",
     "changed_files": 1,
-    "cluster_id": "cluster-43324-12",
-    "cluster_ids": [
-      "cluster-43324-12"
-    ],
-    "cluster_role": "member",
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43758",
-    "created_at": "2026-02-05T08:11:34Z",
-    "deletions": 11,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44056",
+    "created_at": "2026-02-16T18:27:05Z",
+    "deletions": 14,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43758/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43758",
+    "files_url": "https://github.com/huggingface/transformers/pull/44056/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44056",
     "labels": [],
-    "merged": true,
-    "number": 43758,
-    "review_comments_count": 4,
-    "state": "closed",
-    "title": "stablize the expect output for eomt_dinov3 model test",
-    "updated_at": "2026-02-05T14:46:06Z"
+    "merged": false,
+    "number": 44056,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[MPNet] Refactor output tracing using capture_outputs decorator",
+    "updated_at": "2026-02-17T11:23:12Z"
   },
   {
-    "additions": 15,
-    "author": "TheSanjBot",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR avoids a hard failure when loading GGUF models that declare the `gpt-oss` architecture. Currently, such models raise a `ValueError` during GGUF config loading. This change maps `gpt-oss` to the closest suppo\u2026",
-    "changed_files": 2,
+    "additions": 5,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do?",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43757",
-    "created_at": "2026-02-05T07:56:19Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44055",
+    "created_at": "2026-02-16T18:26:43Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43757/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43757",
+    "files_url": "https://github.com/huggingface/transformers/pull/44055/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44055",
     "labels": [],
-    "merged": false,
-    "number": 43757,
+    "merged": true,
+    "number": 44055,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Avoid hard failure for gpt-oss GGUF architecture by falling back to g\u2026",
-    "updated_at": "2026-02-05T17:42:11Z"
+    "state": "closed",
+    "title": "Fix unprotected torch import",
+    "updated_at": "2026-02-16T18:43:01Z"
   },
   {
-    "additions": 27,
-    "author": "kaixuanliu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR refines the test file of ernie4_5_vl_moe model, mainly 2 things: 1. avoid hard cord for expected image token count 2. add padding_side=\"left\" for bs > 1 inference",
-    "changed_files": 2,
+    "additions": 346,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Add flash MLA interface. - It does not work I get a segfault - we don't leverage the paged cache so it's not as efficient as that I reckon. ```bash Fetching 6 files: 100%|\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2026",
+    "changed_files": 10,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 13,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43755",
-    "created_at": "2026-02-05T05:25:39Z",
-    "deletions": 9,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44054",
+    "created_at": "2026-02-16T18:07:14Z",
+    "deletions": 93,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44054/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44054",
+    "labels": [],
+    "merged": false,
+    "number": 44054,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Flash mla interface",
+    "updated_at": "2026-02-20T11:14:39Z"
+  },
+  {
+    "additions": 2,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do?",
+    "changed_files": 1,
+    "cluster_id": "cluster-44053-8",
+    "cluster_ids": [
+      "cluster-44053-8"
+    ],
+    "cluster_role": "member",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44053",
+    "created_at": "2026-02-16T17:59:48Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43755/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43755",
+    "files_url": "https://github.com/huggingface/transformers/pull/44053/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44053",
     "labels": [],
     "merged": true,
-    "number": 43755,
+    "number": 44053,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Ernie4 5 vl moe",
-    "updated_at": "2026-02-11T15:54:10Z"
+    "title": "Fix peft conversion typo",
+    "updated_at": "2026-02-17T11:12:19Z"
   },
   {
-    "additions": 0,
-    "author": "cyyever",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? https://github.com/pytorch/pytorch/issues/112577 was fixed in torch 2.2. Let's remove the workarounds and see what happen in the tests.",
-    "changed_files": 7,
+    "additions": 2,
+    "author": "tomaarsen",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? * Fix incorrect forward type hint for Gemma3n ## Details The type hint didn't match the actual returned class: https://github.com/huggingface/transformers/blob/349e00c1a367ce263624e525038250625dcf20c7/src/transforme\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43754",
-    "created_at": "2026-02-05T03:13:15Z",
-    "deletions": 52,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44051",
+    "created_at": "2026-02-16T17:26:24Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43754/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43754",
+    "files_url": "https://github.com/huggingface/transformers/pull/44051/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44051",
     "labels": [],
     "merged": true,
-    "number": 43754,
+    "number": 44051,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Remove SDPA workarounds for torch 2.4+",
-    "updated_at": "2026-02-05T14:44:16Z"
+    "title": "[`chore`] Fix incorrect forward type hint for Gemma3n",
+    "updated_at": "2026-02-20T09:08:07Z"
   },
   {
-    "additions": 1,
-    "author": "cyyever",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Update KERNELS_MIN_VERSION to 0.10.2 to be the same as setup.py",
-    "changed_files": 1,
+    "additions": 15,
+    "author": "tomaarsen",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? * Patch `get_text_features` for ChineseCLIP ### Details The `get_text_features` assumes that the `text_model` returns a `BaseModelOutputWithPooling`, just like is done with many other models. Currently, the `get_tex\u2026",
+    "changed_files": 7,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43753",
-    "created_at": "2026-02-05T03:02:43Z",
-    "deletions": 1,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44050",
+    "created_at": "2026-02-16T17:23:31Z",
+    "deletions": 19,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43753/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43753",
+    "files_url": "https://github.com/huggingface/transformers/pull/44050/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44050",
     "labels": [],
-    "merged": true,
-    "number": 43753,
-    "review_comments_count": 0,
+    "merged": false,
+    "number": 44050,
+    "review_comments_count": 4,
     "state": "closed",
-    "title": "Update KERNELS_MIN_VERSION to 0.10.2 to be the same as setup.py",
-    "updated_at": "2026-02-11T12:24:15Z"
+    "title": "[`fix`] Patch `get_text_features` for ChineseCLIP",
+    "updated_at": "2026-02-17T09:55:17Z"
   },
   {
-    "additions": 0,
-    "author": "bhargav-patel-29",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR adds support for **Param-2-17B-MoE-A2.4B**, a large-scale Mixture-of-Experts (MoE) causal language model. Param-2-17B-MoE-A2.4B uses a **Hybrid Dense + MoE architecture** with 17B total parameters while acti\u2026",
-    "changed_files": 0,
+    "additions": 59,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Refactors the `fnet` model to use the new `@capture_outputs` and `@can_return_tuple` decorators, as part of #43979. ### Changes: - Added `_can_record_outputs = {\"hidden_states\": FNetLayer}` to `FNetPreTrainedModel`\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43752",
-    "created_at": "2026-02-05T02:43:45Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44049",
+    "created_at": "2026-02-16T17:19:04Z",
+    "deletions": 112,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43752/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43752",
+    "files_url": "https://github.com/huggingface/transformers/pull/44049/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44049",
     "labels": [],
     "merged": false,
-    "number": 43752,
+    "number": 44049,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Param2moe v4.52.3",
-    "updated_at": "2026-02-10T10:28:02Z"
+    "title": "Refactor fnet model to use @capture_outputs and @can_return_tuple decorators",
+    "updated_at": "2026-03-03T00:30:13Z"
   },
   {
-    "additions": 152,
-    "author": "cyyever",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fix more warnings in ruff 0.15 from existing enabled rules. Almost all changes are about turning `if A then A else B` into `A or B`.",
-    "changed_files": 40,
+    "additions": 4,
+    "author": "tomaarsen",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? * Fix up `__repr__` whitespace/brackets ## Reproducer ```python from transformers import AutoTokenizer, PreTrainedTokenizerBase # __repr__ via PreTrainedTokenizerBase tokenizer = AutoTokenizer.from_pretrained(\"bert-\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43751",
-    "created_at": "2026-02-05T02:08:42Z",
-    "deletions": 98,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44048",
+    "created_at": "2026-02-16T17:18:10Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43751/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43751",
+    "files_url": "https://github.com/huggingface/transformers/pull/44048/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44048",
     "labels": [],
-    "merged": false,
-    "number": 43751,
+    "merged": true,
+    "number": 44048,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Fix ruff warnings",
-    "updated_at": "2026-03-24T03:12:36Z"
+    "state": "closed",
+    "title": "[`simple`] Fix up `__repr__` whitespace/brackets",
+    "updated_at": "2026-02-20T10:03:34Z"
   },
   {
-    "additions": 40,
-    "author": "sywangyi",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "enable tp in benchmark_v2, to ensure large model could run.",
-    "changed_files": 3,
+    "additions": 35,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Refactors the `bloom` model to use the new `@capture_outputs` and `@can_return_tuple` decorators, as part of the effort in #43979. ### Changes: - Add `_can_record_outputs` dict to `BloomPreTrainedModel` mapping `hi\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43750",
-    "created_at": "2026-02-05T01:53:45Z",
-    "deletions": 5,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44047",
+    "created_at": "2026-02-16T17:15:25Z",
+    "deletions": 104,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43750/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43750",
-    "labels": [],
-    "merged": true,
-    "number": 43750,
-    "review_comments_count": 6,
+    "files_url": "https://github.com/huggingface/transformers/pull/44047/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44047",
+    "labels": [],
+    "merged": false,
+    "number": 44047,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "enable tp for benchmark",
-    "updated_at": "2026-03-19T16:23:37Z"
+    "title": "Refactor bloom model to use @capture_outputs and @can_return_tuple decorators",
+    "updated_at": "2026-03-03T00:30:14Z"
   },
   {
-    "additions": 1372,
-    "author": "yonigozlan",
-    "author_association": "MEMBER",
-    "body_excerpt": "Improve types parsing to handle \"|\" syntax and other edge cases, add much needed tests Cc @Cyrilvallez ;)",
-    "changed_files": 9,
+    "additions": 24,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Refactors the `codegen` model to use the `@capture_outputs` and `@can_return_tuple` decorators, replacing manual `output_attentions`/`output_hidden_states`/`return_dict` handling. ### Changes: - Add `_can_record_ou\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43748",
-    "created_at": "2026-02-04T22:08:11Z",
-    "deletions": 227,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44046",
+    "created_at": "2026-02-16T17:07:38Z",
+    "deletions": 70,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43748/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43748",
+    "files_url": "https://github.com/huggingface/transformers/pull/44046/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44046",
     "labels": [],
-    "merged": true,
-    "number": 43748,
-    "review_comments_count": 4,
+    "merged": false,
+    "number": 44046,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[auto_docstring] Improve typing parsing and add tests",
-    "updated_at": "2026-02-25T18:42:39Z"
+    "title": "Refactor codegen model to use @capture_outputs and @can_return_tuple decorators",
+    "updated_at": "2026-02-17T14:15:23Z"
   },
   {
-    "additions": 52,
-    "author": "colldata79",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Title: Remove CompressedLinear support for compressed-tensors > 0.13 Body: ## What does this PR do? Prepares transformers for the removal of `CompressedLinear` from compressed-tensors (v0.14+). Users should now call `model.dequantize()` af\u2026",
-    "changed_files": 2,
+    "additions": 456215,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 4939,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43747",
-    "created_at": "2026-02-04T21:25:10Z",
-    "deletions": 23,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43747/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43747",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44045",
+    "created_at": "2026-02-16T17:01:41Z",
+    "deletions": 591028,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/44045/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44045",
     "labels": [],
     "merged": false,
-    "number": 43747,
-    "review_comments_count": 7,
-    "state": "open",
-    "title": "Remove CompressedLinear support for compressed-tensors > 0.13",
-    "updated_at": "2026-02-05T18:30:27Z"
+    "number": 44045,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Flash-mla-interface",
+    "updated_at": "2026-02-16T17:11:51Z"
   },
   {
-    "additions": 74,
-    "author": "antznette1",
+    "additions": 49,
+    "author": "rwtarpit",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes #43701 When resuming training via [Trainer.train(resume_from_checkpoint=...)](cci:1://file:///c:/Users/brass/OneDrive/Desktop/Work/clients/Anthonette/transformers/src/transformers/trainer.py:2070:4-2175:13), t\u2026",
-    "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43745",
-    "created_at": "2026-02-04T17:27:34Z",
-    "deletions": 4,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43745/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43745",
-    "labels": [
-      "Code agent slop"
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-43979-28",
+    "cluster_ids": [
+      "cluster-43979-28"
     ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44044",
+    "created_at": "2026-02-16T16:43:19Z",
+    "deletions": 112,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44044/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44044",
+    "labels": [],
     "merged": false,
-    "number": 43745,
+    "number": 44044,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix Trainer resume_from_checkpoint to apply checkpoint conversion mappings",
-    "updated_at": "2026-02-05T12:59:43Z"
+    "state": "open",
+    "title": "Refactor DeBERTa's output tracing interface",
+    "updated_at": "2026-02-16T18:57:29Z"
   },
   {
-    "additions": 635,
-    "author": "SunMarc",
+    "additions": 170,
+    "author": "IlyasMoutawwakil",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR updates a few minor things from trainer. Some of them are breaking but I think it should be safe to do as I don't think anyone is subclassing them or using them separately. Breaking: - move `propagate_args_t\u2026",
-    "changed_files": 6,
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 31,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 9,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43744",
-    "created_at": "2026-02-04T17:16:20Z",
-    "deletions": 583,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44043",
+    "created_at": "2026-02-16T16:23:57Z",
+    "deletions": 162,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43744/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43744",
+    "files_url": "https://github.com/huggingface/transformers/pull/44043/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44043",
     "labels": [],
     "merged": true,
-    "number": 43744,
-    "review_comments_count": 9,
+    "number": 44043,
+    "review_comments_count": 15,
     "state": "closed",
-    "title": "Minor changes trainer",
-    "updated_at": "2026-02-17T11:47:46Z"
+    "title": "`grouped_mm` fallback",
+    "updated_at": "2026-02-23T13:58:09Z"
   },
   {
-    "additions": 2460,
-    "author": "itazap",
+    "additions": 1,
+    "author": "Rocketknight1",
     "author_association": "MEMBER",
-    "body_excerpt": "**Update:** - improve sanitization of code pre-embedding - strip dtypes, args, params, etc. - filter self-contained model matches - improve summary (see below) - create prompt .md to create a modular file based on detector's results, that\u2026",
-    "changed_files": 4,
+    "body_excerpt": "Joao is regrettably no longer with us :saluting_face: so we should really stop getting users to ping him! This PR makes @cyrilvallez responsible for `generate` issues outside of VLMs.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43743",
-    "created_at": "2026-02-04T17:01:41Z",
-    "deletions": 242,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44042",
+    "created_at": "2026-02-16T16:00:36Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43743/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43743",
+    "files_url": "https://github.com/huggingface/transformers/pull/44042/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44042",
     "labels": [],
-    "merged": false,
-    "number": 43743,
+    "merged": true,
+    "number": 44042,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Modular playground",
-    "updated_at": "2026-03-24T13:09:46Z"
+    "state": "closed",
+    "title": "Update assignee for generate in bug report template",
+    "updated_at": "2026-02-16T16:09:19Z"
   },
   {
-    "additions": 7,
-    "author": "Rocketknight1",
-    "author_association": "MEMBER",
-    "body_excerpt": "This time I'm focusing on `_get_component_class_from_processor` in the tests. I'm not sure if the way this operates can sometimes result in Tokenizer classes going missing.",
-    "changed_files": 1,
+    "additions": 469,
+    "author": "winglian",
+    "author_association": "COLLABORATOR",
+    "body_excerpt": "# What does this PR do? Alternate PR to #43985 to be a reorder only PR. <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, s\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43741",
-    "created_at": "2026-02-04T16:12:32Z",
-    "deletions": 2,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44041",
+    "created_at": "2026-02-16T15:40:41Z",
+    "deletions": 457,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43741/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43741",
+    "files_url": "https://github.com/huggingface/transformers/pull/44041/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44041",
     "labels": [],
-    "merged": false,
-    "number": 43741,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 44041,
+    "review_comments_count": 14,
     "state": "closed",
-    "title": "Make the CI Green Round 3",
-    "updated_at": "2026-02-04T17:11:01Z"
+    "title": "refactor _inner_training_loop to smaller methods",
+    "updated_at": "2026-02-23T16:52:09Z"
   },
   {
-    "additions": 3,
-    "author": "cyyever",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR removes more old PT code. ## Who can review? Anyone in the community is free to review the PR once the tests have passed. Feel free to tag members/contributors who may be interested in your PR. <!-- Your PR\u2026",
-    "changed_files": 1,
+    "additions": 366,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/44008 and re-enables tests",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43740",
-    "created_at": "2026-02-04T15:26:39Z",
-    "deletions": 30,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/43740/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43740",
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44040",
+    "created_at": "2026-02-16T12:43:28Z",
+    "deletions": 230,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44040/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44040",
     "labels": [],
-    "merged": false,
-    "number": 43740,
-    "review_comments_count": 4,
+    "merged": true,
+    "number": 44040,
+    "review_comments_count": 14,
     "state": "closed",
-    "title": "Remove more PyTorch < 2.4 code",
-    "updated_at": "2026-02-05T00:57:56Z"
+    "title": "Fix gemma3n `get_audio_features`",
+    "updated_at": "2026-02-19T12:50:00Z"
   },
   {
-    "additions": 27,
-    "author": "Rayyan-Oumlil",
+    "additions": 47,
+    "author": "itzyesse99-lgtm",
     "author_association": "NONE",
-    "body_excerpt": "Fixes #43698 ## Summary When using `Trainer` with SwanLab and resuming training (`trainer.train(resume_from_checkpoint=...)`), the integration previously had no way to pass `id` and `resume` to `swanlab.init()`, so a new experiment was alw\u2026",
+    "body_excerpt": "```diff diff --git a/transformers/modeling_utils.py b/transformers/modeling_utils.py index 1234567..8901234 100644 --- a/transformers/modeling_utils.py +++ b/transformers/modeling_utils.py @@ -10,6 +10,7 @@ from transformers import PreTrai\u2026",
     "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43739",
-    "created_at": "2026-02-04T15:11:22Z",
+    "cluster_id": "cluster-43979-28",
+    "cluster_ids": [
+      "cluster-43979-28"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44039",
+    "created_at": "2026-02-16T12:01:26Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43739/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43739",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/44039/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44039",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 43739,
-    "review_comments_count": 2,
+    "number": 44039,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "SwanLab: add id and resume support for resuming runs (fixes #43698)",
-    "updated_at": "2026-02-21T17:54:07Z"
+    "title": "AI Fix for #43979",
+    "updated_at": "2026-03-14T12:34:32Z"
   },
   {
-    "additions": 708,
-    "author": "SunMarc",
+    "additions": 23,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR do the following: - Created src/transformers/trainer_optimizer.py with: - Helper functions for each optimizer category - Updated `Trainer.get_optimizer_cls_and_kwargs` - Simplify tests",
+    "body_excerpt": "# What does this PR do? As per the title. Supersedes https://github.com/huggingface/transformers/pull/43913",
     "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43738",
-    "created_at": "2026-02-04T15:09:30Z",
-    "deletions": 767,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44037",
+    "created_at": "2026-02-16T11:02:12Z",
+    "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43738/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43738",
+    "files_url": "https://github.com/huggingface/transformers/pull/44037/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44037",
     "labels": [],
     "merged": true,
-    "number": 43738,
-    "review_comments_count": 4,
+    "number": 44037,
+    "review_comments_count": 3,
     "state": "closed",
-    "title": "[Trainer] Move optimizer cls init to trainer_optimizer.py",
-    "updated_at": "2026-02-10T14:26:45Z"
+    "title": "Add a dim check mechanism in Transpose and fix qwen3_vl_moe weight mapping",
+    "updated_at": "2026-02-16T16:01:12Z"
   },
   {
-    "additions": 197,
-    "author": "riccardofelluga",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Adds support for PyTorch's `associative_scan` higher-order op in Mamba and RecurrentGemma models, enabling parallel computation of the recurrent scan during `torch.compile()`. - Uses `associative_scan` when availabl\u2026",
-    "changed_files": 8,
+    "additions": 0,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? So the following logic added in a previous PR #44033 could take effect ```python # `include_all` is `True` when the CI is running on a pull request, so it treats all failing tests # in the current CI run as \"new fai\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 40,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43737",
-    "created_at": "2026-02-04T15:06:43Z",
-    "deletions": 31,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44036",
+    "created_at": "2026-02-16T10:14:54Z",
+    "deletions": 12,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43737/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43737",
+    "files_url": "https://github.com/huggingface/transformers/pull/44036/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44036",
     "labels": [],
     "merged": true,
-    "number": 43737,
-    "review_comments_count": 4,
+    "number": 44036,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Use associative scan HOP mamba recurrentgemma",
-    "updated_at": "2026-02-25T17:06:20Z"
+    "title": "Remove `other_workflow_run_ids` for `issue_comment` in `utils/notification_service.py`",
+    "updated_at": "2026-02-16T10:24:07Z"
   },
   {
-    "additions": 155,
-    "author": "SunMarc",
+    "additions": 25,
+    "author": "ArthurZucker",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR do the following: - Extract `sorted_checkpoints` and `rotate_checkpoints` from Trainer class to standalone functions in `trainer_utils.py` - Rename `sorted_checkpoints` to `sort_checkpoints` - Simplify imple\u2026",
-    "changed_files": 3,
+    "body_excerpt": "# What does this PR do? We already brought it back with: ```python if clean_up_tokenization_spaces: # Call custom cleanup method if it exists (e.g., for CLVP's [SPACE] token replacement) if hasattr(self, \"clean_up_tokenization\") and callab\u2026",
+    "changed_files": 7,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43736",
-    "created_at": "2026-02-04T14:28:04Z",
-    "deletions": 67,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44035",
+    "created_at": "2026-02-16T09:49:28Z",
+    "deletions": 112,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43736/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43736",
+    "files_url": "https://github.com/huggingface/transformers/pull/44035/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44035",
     "labels": [],
     "merged": true,
-    "number": 43736,
-    "review_comments_count": 3,
+    "number": 44035,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[Trainer] Move sort and rotate checkpoints to standalone functions",
-    "updated_at": "2026-02-05T15:26:50Z"
+    "title": "bring back our demons: clean_up_tokenization_spaces",
+    "updated_at": "2026-02-20T14:50:18Z"
   },
-  {
-    "additions": 10,
-    "author": "Rocketknight1",
+  {
+    "additions": 18,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "One of the recurring issues we see in the CI is tokenizers occasionally being initialized as instances of `TokenizersBackend` rather than the actual tokenizer class. The error crops up all over, and is annoyingly hard to track down because\u2026",
-    "changed_files": 4,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43735",
-    "created_at": "2026-02-04T14:23:46Z",
-    "deletions": 1,
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 2,
+    "cluster_id": "cluster-43488-10",
+    "cluster_ids": [
+      "cluster-43488-10"
+    ],
+    "cluster_role": "member",
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44034",
+    "created_at": "2026-02-16T08:04:20Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43735/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43735",
+    "files_url": "https://github.com/huggingface/transformers/pull/44034/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44034",
     "labels": [],
     "merged": false,
-    "number": 43735,
+    "number": 44034,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Chase TokenizersBackend issue",
-    "updated_at": "2026-02-06T15:18:10Z"
+    "title": "don't merge check workflow",
+    "updated_at": "2026-02-16T10:52:50Z"
   },
   {
-    "additions": 2643,
-    "author": "zucchini-nlp",
+    "additions": 143,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per title. lays ground to unifying 3D position ids in qwen-style VLMs PR adds a single entrypoint to prepare position ids in `GenerationMixin` which models can override if needed (qwen-vl for ex). This allow user\u2026",
-    "changed_files": 36,
+    "body_excerpt": "# What does this PR do? Improve new failing test analysis for PR comment CI",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 25,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43734",
-    "created_at": "2026-02-04T11:12:26Z",
-    "deletions": 2693,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44033",
+    "created_at": "2026-02-16T07:30:33Z",
+    "deletions": 49,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43734/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43734",
+    "files_url": "https://github.com/huggingface/transformers/pull/44033/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44033",
     "labels": [],
     "merged": true,
-    "number": 43734,
-    "review_comments_count": 34,
+    "number": 44033,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Prepare and keep track of position ids in `generate`",
-    "updated_at": "2026-02-12T09:57:20Z"
+    "title": "Improve new failing test analysis for PR comment CI",
+    "updated_at": "2026-02-16T08:02:16Z"
   },
   {
     "additions": 3,
-    "author": "nesjett",
+    "author": "JJJYmmm",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# Summary This PR removes the deprecated tokenizer parameter from the Quicktour documentation examples. As of the v5.0.0 release, the tokenizer argument was officially replaced from the Trainer constructor. Currently, the documentation exa\u2026",
-    "changed_files": 3,
+    "body_excerpt": "# What does this PR do? Update FP8 expert replacement to use `model.config.text_config` when available (VLMs), falling back to model.config if it's text-only models.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43733",
-    "created_at": "2026-02-04T10:28:20Z",
-    "deletions": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44032",
+    "created_at": "2026-02-16T06:02:28Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43733/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43733",
+    "files_url": "https://github.com/huggingface/transformers/pull/44032/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44032",
     "labels": [],
     "merged": true,
-    "number": 43733,
+    "number": 44032,
     "review_comments_count": 2,
     "state": "closed",
-    "title": "Docs: fix Training step by removing tokenizer from trainer initialization",
-    "updated_at": "2026-02-04T16:49:59Z"
+    "title": "[Misc][vlms] Use text_config when initializing the fine-grained FP8Expert",
+    "updated_at": "2026-02-19T10:28:31Z"
   },
   {
-    "additions": 37,
-    "author": "SunMarc",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? We need to move `get_module_size_with_ties` in transformers as the min version of accelerate 1.1.0 don't have this function. It was introduced in 1.2.0 actually . Another solution would be change the minimum version\u2026",
+    "additions": 11,
+    "author": "omkar-334",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR refactors the`dpr` model as per #43979 cc @molbap <img width=\"853\" height=\"323\" alt=\"Screenshot 2026-02-16 at 9 13 30 AM\" src=\"https://github.com/user-attachments/assets/d658f1d0-75e8-4eac-8a12-9aeddf194dde\" />",
     "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
+    "cluster_id": "cluster-43998-11",
+    "cluster_ids": [
+      "cluster-43998-11"
+    ],
+    "cluster_role": "member",
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43732",
-    "created_at": "2026-02-04T09:58:11Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44030",
+    "created_at": "2026-02-16T03:44:19Z",
+    "deletions": 58,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43732/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43732",
+    "files_url": "https://github.com/huggingface/transformers/pull/44030/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44030",
     "labels": [],
-    "merged": true,
-    "number": 43732,
+    "merged": false,
+    "number": 44030,
+    "review_comments_count": 1,
+    "state": "open",
+    "title": "refactor output tracing in `dpr`",
+    "updated_at": "2026-02-17T07:46:00Z"
+  },
+  {
+    "additions": 21,
+    "author": "omkar-334",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR refactors the`rwkv` model as per #43979 cc @molbap <img width=\"856\" height=\"333\" alt=\"Screenshot 2026-02-16 at 9 06 34 AM\" src=\"https://github.com/user-attachments/assets/9c8c5d41-ffbd-45f6-8b9b-1429bcb14543\" />",
+    "changed_files": 1,
+    "cluster_id": "cluster-43998-11",
+    "cluster_ids": [
+      "cluster-43998-11"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44029",
+    "created_at": "2026-02-16T03:37:13Z",
+    "deletions": 55,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44029/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44029",
+    "labels": [],
+    "merged": false,
+    "number": 44029,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix accelerate integration import ",
-    "updated_at": "2026-02-04T14:57:51Z"
+    "state": "open",
+    "title": "refactor output tracing in `rwkv`",
+    "updated_at": "2026-02-17T07:47:02Z"
   },
   {
-    "additions": 3,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/43716, as per title We didn't see errors even if we test `bf16` because most backbone models (e.g. SigLIP) cast pixels to target dtype inside the model code.\u2026",
+    "additions": 13,
+    "author": "omkar-334",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR refactors the`superpoint` model as per #43979 cc @molbap <img width=\"857\" height=\"334\" alt=\"Screenshot 2026-02-16 at 8 53 43 AM\" src=\"https://github.com/user-attachments/assets/17781b76-743b-4b38-923a-8db3b94ccd01\" />",
     "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43731",
-    "created_at": "2026-02-04T09:27:59Z",
-    "deletions": 2,
+    "cluster_id": "cluster-43998-11",
+    "cluster_ids": [
+      "cluster-43998-11"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44028",
+    "created_at": "2026-02-16T03:25:14Z",
+    "deletions": 46,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43731/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43731",
+    "files_url": "https://github.com/huggingface/transformers/pull/44028/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44028",
     "labels": [],
-    "merged": true,
-    "number": 43731,
+    "merged": false,
+    "number": 44028,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix dtype in image-text-to-text pipe",
-    "updated_at": "2026-02-04T13:54:52Z"
+    "state": "open",
+    "title": "refactor output tracing for `superpoint`",
+    "updated_at": "2026-02-17T07:46:06Z"
   },
   {
-    "additions": 127,
-    "author": "ArthurZucker",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? ## Fix GPT-OSS ??? EP sharding works, the forward is broken for eager (does not work) and output giberissh for grouped_mm. Will work on a fix to have in the patch! ## Fix any TP model - fix get shard tensor - fix ge\u2026",
-    "changed_files": 16,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43730",
-    "created_at": "2026-02-04T09:23:04Z",
-    "deletions": 80,
+    "additions": 6,
+    "author": "omkar-334",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR refactors the `speech_encoder_decoder` model as per #43979 cc @molbap <img width=\"852\" height=\"335\" alt=\"Screenshot 2026-02-16 at 8 44 05 AM\" src=\"https://github.com/user-attachments/assets/ee25c72b-b995-403c-b47b-3e9cbae0d2cc\" />",
+    "changed_files": 1,
+    "cluster_id": "cluster-43998-11",
+    "cluster_ids": [
+      "cluster-43998-11"
+    ],
+    "cluster_role": "member",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44027",
+    "created_at": "2026-02-16T03:14:41Z",
+    "deletions": 22,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43730/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43730",
+    "files_url": "https://github.com/huggingface/transformers/pull/44027/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44027",
     "labels": [],
-    "merged": true,
-    "number": 43730,
-    "review_comments_count": 7,
-    "state": "closed",
-    "title": "Fix EP post merge",
-    "updated_at": "2026-02-05T15:23:41Z"
+    "merged": false,
+    "number": 44027,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "refactor output tracing in `speech_encoder_decoder`",
+    "updated_at": "2026-02-17T09:04:35Z"
   },
   {
-    "additions": 15,
-    "author": "NielsRogge",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR fixes a bug regarding the doc tests. It also enables the doc tests for Eomt-DINOv3. ## Summary The `SKIP_CUDA_DOCTEST` environment variable check in `src/transformers/testing_utils.py` has a bug that causes\u2026",
-    "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
+    "additions": 12,
+    "author": "omkar-334",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR refactors the`vision_encoder_decoder` model as per #43979 cc @molbap <img width=\"849\" height=\"333\" alt=\"Screenshot 2026-02-16 at 8 28 20 AM\" src=\"https://github.com/user-attachments/assets/9f511a17-947b-46ed-82a8-8bb9bb103f15\" />",
+    "changed_files": 1,
+    "cluster_id": "cluster-43998-11",
+    "cluster_ids": [
+      "cluster-43998-11"
+    ],
+    "cluster_role": "member",
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43729",
-    "created_at": "2026-02-04T09:14:42Z",
-    "deletions": 19,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44026",
+    "created_at": "2026-02-16T02:59:14Z",
+    "deletions": 22,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43729/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43729",
+    "files_url": "https://github.com/huggingface/transformers/pull/44026/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44026",
     "labels": [],
-    "merged": true,
-    "number": 43729,
-    "review_comments_count": 1,
-    "state": "closed",
-    "title": "[Doc tests] Fix bug",
-    "updated_at": "2026-02-13T09:17:44Z"
+    "merged": false,
+    "number": 44026,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "refactor output tracing for `vision_encoder_decoder`",
+    "updated_at": "2026-02-17T09:05:22Z"
   },
   {
-    "additions": 10,
-    "author": "kaixuanliu",
+    "additions": 7,
+    "author": "omkar-334",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "In qwen2_5_vl model's test file, we use `Qwen/Qwen2.5-VL-7B-Instruct` model, which its default generation config's `do_sample` param is `True`. This causes some of the test cases are not steady, they will pass sometimes while sometimes get\u2026",
+    "body_excerpt": "This PR refactors the `depth_anything` model as per #43979 cc @molbap <img width=\"840\" height=\"330\" alt=\"Screenshot 2026-02-16 at 8 25 01 AM\" src=\"https://github.com/user-attachments/assets/fe7770be-70cb-4343-accb-7407c6bbb4f8\" />",
+    "changed_files": 2,
+    "cluster_id": "cluster-43998-11",
+    "cluster_ids": [
+      "cluster-43998-11"
+    ],
+    "cluster_role": "canonical",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44025",
+    "created_at": "2026-02-16T02:56:17Z",
+    "deletions": 23,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44025/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44025",
+    "labels": [],
+    "merged": false,
+    "number": 44025,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "refactor output tracing for `depth_anything`",
+    "updated_at": "2026-02-17T07:46:31Z"
+  },
+  {
+    "additions": 15,
+    "author": "mmahjoub5",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR refactors the FocalNet implementation to adopt the new `@capture_outputs` and `@can_return_tuple` decorators for standardized output collection, as part of https://github.com/huggingface/transformers/issues/\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 11,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43728",
-    "created_at": "2026-02-04T07:55:12Z",
-    "deletions": 11,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44024",
+    "created_at": "2026-02-15T23:48:12Z",
+    "deletions": 60,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/44024/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44024",
+    "labels": [],
+    "merged": false,
+    "number": 44024,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Focalnet standardized outputs",
+    "updated_at": "2026-02-17T08:47:48Z"
+  },
+  {
+    "additions": 32,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Refactors the Nystromformer model to use the new `@capture_outputs` and `@can_return_tuple` decorators for standardized output collection, as part of #43979. ### Changes: - Add `_can_record_outputs` on `Nystromform\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-43979-28",
+    "cluster_ids": [
+      "cluster-43979-28"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44023",
+    "created_at": "2026-02-15T21:53:48Z",
+    "deletions": 122,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43728/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43728",
+    "files_url": "https://github.com/huggingface/transformers/pull/44023/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44023",
     "labels": [],
-    "merged": true,
-    "number": 43728,
+    "merged": false,
+    "number": 44023,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "add `do_sample=False` to qwen2_5_vl model tests to stablize the output",
-    "updated_at": "2026-02-09T09:12:47Z"
+    "title": "Refactor Nystromformer output tracing with @capture_outputs and @can_return_tuple decorators",
+    "updated_at": "2026-03-03T00:30:15Z"
   },
   {
-    "additions": 3,
-    "author": "ExcitingFrog",
+    "additions": 57,
+    "author": "ManasVardhan",
     "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? Restore backward-compatible TensorBoard logging directory behavior. The callback now prefers `TENSORBOARD_LOGGING_DIR` when set, but falls back to `TrainingArguments.logging_dir` if the env var is not provided, pres\u2026",
+    "body_excerpt": "## What does this PR do? Refactors the ConvBERT model to use the new `@capture_outputs` and `@can_return_tuple` decorators for standardized output collection, as part of #43979. ### Changes: - Move `ConvBertPreTrainedModel` after layer def\u2026",
     "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43727",
-    "created_at": "2026-02-04T07:11:33Z",
-    "deletions": 0,
+    "cluster_id": "cluster-43979-28",
+    "cluster_ids": [
+      "cluster-43979-28"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44022",
+    "created_at": "2026-02-15T21:49:57Z",
+    "deletions": 152,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43727/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43727",
+    "files_url": "https://github.com/huggingface/transformers/pull/44022/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44022",
     "labels": [],
     "merged": false,
-    "number": 43727,
+    "number": 44022,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "feat: Add legacy TrainingArguments logging_dir fallback for TensorBoard",
-    "updated_at": "2026-02-04T13:37:44Z"
+    "title": "Refactor ConvBERT output tracing with @capture_outputs and @can_return_tuple decorators",
+    "updated_at": "2026-03-03T00:30:17Z"
   },
   {
-    "additions": 1381,
-    "author": "3outeille",
-    "author_association": "MEMBER",
-    "body_excerpt": "Let's make sure it works for decoder only first (We skip VLM + Encoder-decoder for now) Introduction, forward, backward, generation (with convert mapping triggering) test agains TP vs non-TP baseline ```python from transformers import Auto\u2026",
-    "changed_files": 77,
+    "additions": 22,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Fixes #43906 (related to #38071) ### Problem When using `pipeline('text-generation')` with batched inference on Qwen3 (and other models where `pad_token_id == bos_token_id`), a spurious warning is emitted: > A deco\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 27,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43722",
-    "created_at": "2026-02-03T23:08:40Z",
-    "deletions": 796,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44021",
+    "created_at": "2026-02-15T21:45:58Z",
+    "deletions": 13,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43722/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43722",
+    "files_url": "https://github.com/huggingface/transformers/pull/44021/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44021",
     "labels": [],
     "merged": true,
-    "number": 43722,
-    "review_comments_count": 84,
+    "number": 44021,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "\ud83d\udea8 fix + tests dense & MoE TP all reduce (decoder only)",
-    "updated_at": "2026-03-04T16:06:14Z"
+    "title": "Fix false positive right-padding warning for decoder-only models in pipeline",
+    "updated_at": "2026-02-17T10:41:32Z"
   },
   {
-    "additions": 88,
-    "author": "harshaljanjani",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? \u2192 Fixes `tests/models/bitnet/test_modeling_bitnet.py::BitNetIntegrationTest::test_model_generation && tests/models/bitnet/test_modeling_bitnet.py::BitNetIntegrationTest::test_model_logits`. \u2192 The reasoning; picked\u2026",
-    "changed_files": 3,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 8,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43721",
-    "created_at": "2026-02-03T20:01:08Z",
-    "deletions": 0,
+    "additions": 28,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## Summary Refactors the DeBERTa model to use the standardized output collection interface with `@capture_outputs` and `@can_return_tuple` decorators. ### Changes - Add `_can_record_outputs` on `DebertaPreTrainedModel` mapping `hidden_stat\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-43979-28",
+    "cluster_ids": [
+      "cluster-43979-28"
+    ],
+    "cluster_role": "member",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44020",
+    "created_at": "2026-02-15T21:39:17Z",
+    "deletions": 129,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43721/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43721",
+    "files_url": "https://github.com/huggingface/transformers/pull/44020/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44020",
     "labels": [],
-    "merged": true,
-    "number": 43721,
-    "review_comments_count": 8,
+    "merged": false,
+    "number": 44020,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(models): Unpack BitNet packed weights to fix CI failure",
-    "updated_at": "2026-02-16T15:38:50Z"
+    "title": "Refactor DeBERTa output tracing with @capture_outputs and @can_return_tuple",
+    "updated_at": "2026-02-17T14:15:21Z"
   },
   {
-    "additions": 16,
-    "author": "i-pj",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Fixes #43698 - Add SWANLAB_RUN_ID env var to pass experiment ID for resumption - Add SWANLAB_RESUME env var to control resume mode - Default to resume='allow' when resume_from_checkpoint is set\" # What does this PR do? Fixes # (issue) ## B\u2026",
+    "additions": 17,
+    "author": "Sid-V5",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Refactored the `resnet` model to use the standardized output tracing decorators (`@capture_outputs` and `@can_return_tuple`) as part of the migration ### Changes | File | Change | |------|--------| | `modeling_resnet.py` | Migrated to `@ca\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43719",
-    "created_at": "2026-02-03T18:59:54Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44019",
+    "created_at": "2026-02-15T19:53:19Z",
+    "deletions": 62,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43719/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43719",
+    "files_url": "https://github.com/huggingface/transformers/pull/44019/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44019",
     "labels": [],
-    "merged": true,
-    "number": 43719,
+    "merged": false,
+    "number": 44019,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Add id and resume support to SwanLab integration",
-    "updated_at": "2026-02-09T10:41:04Z"
+    "state": "open",
+    "title": "Refactor `resnet` to use `@capture_outputs` / `@can_return_tuple` output tracing",
+    "updated_at": "2026-02-15T20:01:23Z"
   },
   {
-    "additions": 6,
-    "author": "lordaarush",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# Fix Trainer resume_from_checkpoint incorrectly calculating max_steps when changing batch size ## What does this PR do? When resuming training from a checkpoint with a different `per_device_train_batch_size` but the same global batch size\u2026",
-    "changed_files": 1,
+    "additions": 41,
+    "author": "yashbora9",
+    "author_association": "FIRST_TIMER",
+    "body_excerpt": "## Summary - Migrates `gpt_neo` to the standardized output collection interface as part of #43979 - Adds `@capture_outputs` decorator on `GPTNeoModel.forward` (base model) - Adds `@can_return_tuple` decorator on all wrapper model forwards\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43718",
-    "created_at": "2026-02-03T18:37:08Z",
-    "deletions": 1,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44018",
+    "created_at": "2026-02-15T19:35:06Z",
+    "deletions": 109,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43718/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43718",
+    "files_url": "https://github.com/huggingface/transformers/pull/44018/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44018",
     "labels": [],
     "merged": false,
-    "number": 43718,
+    "number": 44018,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix resume_from_checkpoint ignoring user's batch size configuration",
-    "updated_at": "2026-02-06T15:14:33Z"
+    "state": "open",
+    "title": "Refactor GPT-Neo output tracing to use capture_outputs/can_return_tuple",
+    "updated_at": "2026-02-16T20:33:37Z"
   },
   {
-    "additions": 2,
-    "author": "stromfee",
-    "author_association": "NONE",
-    "body_excerpt": "AgentMarket.cloud - B2A marketplace with 28M+ real energy records. Ohne Strom keine KI. \u26a1 https://agentmarket.cloud",
+    "additions": 13,
+    "author": "nexiouscaliver",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "This PR refactors \\`SegformersPreTrainedModel\\` and \\`SegformersForImageClassification\\` to use standardized \\`@capture_outputs\\` and \\`@can_return_tuple\\` decorators for automatic output collection. ### Changes 1. **Imported \\`@capture_ou\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43715",
-    "created_at": "2026-02-03T17:31:54Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44017",
+    "created_at": "2026-02-15T19:27:22Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43715/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43715",
+    "files_url": "https://github.com/huggingface/transformers/pull/44017/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44017",
     "labels": [],
     "merged": false,
-    "number": 43715,
+    "number": 44017,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Add AgentMarket - Real Energy Data for AI Agents",
-    "updated_at": "2026-02-04T12:45:23Z"
+    "state": "open",
+    "title": "Refactor output tracing in segformers (#43979)",
+    "updated_at": "2026-02-20T16:51:42Z"
   },
   {
-    "additions": 137,
-    "author": "SunMarc",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do ? This PR refactor NEFTune implementation from Trainer methods to standalone functions. Part of PRs to unbloat Trainer.",
-    "changed_files": 5,
+    "additions": 95,
+    "author": "akashadsare",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "This PR migrates GPT-2 and its derivatives (GPTBigCode, Decision Transformer) to the new standardized output collection interface using the [@capture_outputs](vscode-file://vscode-app/usr/share/code/resources/app/out/vs/code/electron-brows\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43714",
-    "created_at": "2026-02-03T16:35:29Z",
-    "deletions": 74,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44015",
+    "created_at": "2026-02-15T18:07:11Z",
+    "deletions": 231,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43714/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43714",
+    "files_url": "https://github.com/huggingface/transformers/pull/44015/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44015",
     "labels": [],
-    "merged": true,
-    "number": 43714,
-    "review_comments_count": 2,
-    "state": "closed",
-    "title": "[Trainer] Move NEFTune impl to standalone functions",
-    "updated_at": "2026-02-05T18:58:14Z"
+    "merged": false,
+    "number": 44015,
+    "review_comments_count": 4,
+    "state": "open",
+    "title": "Refactor GPT2-based models to standardized output collection interface",
+    "updated_at": "2026-02-15T18:13:56Z"
   },
   {
-    "additions": 3,
-    "author": "Rocketknight1",
-    "author_association": "MEMBER",
-    "body_excerpt": "The generate tests that compare prompt lookup or speculative decoding to the base model have an extremely high rate of flakiness, I guess because of inherent non-determinism. The actual generation works, but the test frequently sees diverg\u2026",
-    "changed_files": 1,
+    "additions": 45,
+    "author": "weiguangli-io",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary Fixes #43992 by preventing a false missing-key report for `UMT5EncoderModel` when `encoder.embed_tokens.weight` is tied to `shared.weight`. `UMT5EncoderModel` already declares tied weights, but loading checkpoints that only carr\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43713",
-    "created_at": "2026-02-03T15:51:38Z",
-    "deletions": 1,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44014",
+    "created_at": "2026-02-15T15:17:22Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43713/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43713",
+    "files_url": "https://github.com/huggingface/transformers/pull/44014/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44014",
     "labels": [],
     "merged": false,
-    "number": 43713,
+    "number": 44014,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "More flaky generate tests",
-    "updated_at": "2026-02-06T17:40:10Z"
+    "title": "[UMT5] Ignore tied encoder embedding missing-key warning",
+    "updated_at": "2026-02-16T13:40:21Z"
   },
   {
-    "additions": 56,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. See https://github.com/huggingface/transformers/issues/42832#issuecomment-3840648709 where it was pointed out",
+    "additions": 10,
+    "author": "gabrielfruet",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
     "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
+    "cluster_id": "cluster-43979-28",
+    "cluster_ids": [
+      "cluster-43979-28"
+    ],
+    "cluster_role": "member",
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43712",
-    "created_at": "2026-02-03T15:41:51Z",
-    "deletions": 70,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44013",
+    "created_at": "2026-02-15T13:49:53Z",
+    "deletions": 43,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43712/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43712",
+    "files_url": "https://github.com/huggingface/transformers/pull/44013/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44013",
     "labels": [],
-    "merged": true,
-    "number": 43712,
+    "merged": false,
+    "number": 44013,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Improve typing/explanations for general model properties",
-    "updated_at": "2026-02-04T09:28:31Z"
+    "state": "open",
+    "title": "Ouptut tracing: Standardizing MobileNetv2",
+    "updated_at": "2026-02-15T13:50:59Z"
   },
   {
-    "additions": 21,
-    "author": "SunMarc",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR put the `lr_scheduler` initialization after the optimizer is prepared to deal with any modification that might happen after. This is mainly to deal with deepspeed as the optimizer is modified and starting wi\u2026",
+    "additions": 79,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Refactors the **swinv2** model to use hook-based output capturing via `@capture_outputs` and `@can_return_tuple` decorators, removing the manual threading of `output_attentions`/`output_hidden_states`/`return_dict`\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43711",
-    "created_at": "2026-02-03T15:14:52Z",
-    "deletions": 17,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43711/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43711",
-    "labels": [],
-    "merged": true,
-    "number": 43711,
-    "review_comments_count": 2,
-    "state": "closed",
-    "title": "Fix scheduler initialization order",
-    "updated_at": "2026-02-04T16:08:18Z"
-  },
-  {
-    "additions": 130,
-    "author": "NielsRogge",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR updates the docs of [GLM-OCR](https://huggingface.co/zai-org/GLM-OCR) and fixes a link for the docs of EomT-DINOv3. Follow-up of https://github.com/huggingface/transformers/pull/43391.",
-    "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43710",
-    "created_at": "2026-02-03T15:07:07Z",
-    "deletions": 19,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44012",
+    "created_at": "2026-02-15T11:20:17Z",
+    "deletions": 159,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43710/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43710",
+    "files_url": "https://github.com/huggingface/transformers/pull/44012/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44012",
     "labels": [],
-    "merged": true,
-    "number": 43710,
-    "review_comments_count": 4,
+    "merged": false,
+    "number": 44012,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[Docs] Add docs for GLM-OCR and fix EomT-DINOv3",
-    "updated_at": "2026-02-10T15:35:23Z"
+    "title": "Refactor output tracing for swinv2 model",
+    "updated_at": "2026-02-17T14:15:19Z"
   },
   {
-    "additions": 0,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? I had this typing failure ``` $ ty check src/transformers/utils/import_utils.py [snip] error[call-non-callable]: Object of type `VersionComparison` is not callable --> src/transformers/utils/import_utils.py:2317:16\u2026",
-    "changed_files": 0,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
+    "additions": 79,
+    "author": "ManasVardhan",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Refactors the `swin` model to use the standardized output collection interface (`@capture_outputs` and `@can_return_tuple` decorators), as described in #43979. ### Changes **SwinPreTrainedModel:** - Added `_can_rec\u2026",
+    "changed_files": 2,
+    "cluster_id": "cluster-43979-28",
+    "cluster_ids": [
+      "cluster-43979-28"
+    ],
+    "cluster_role": "member",
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43709",
-    "created_at": "2026-02-03T14:26:58Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44011",
+    "created_at": "2026-02-15T11:11:02Z",
+    "deletions": 146,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43709/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43709",
+    "files_url": "https://github.com/huggingface/transformers/pull/44011/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44011",
     "labels": [],
-    "merged": true,
-    "number": 43709,
+    "merged": false,
+    "number": 44011,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: `VersionComparison.from_string` return type mismatch",
-    "updated_at": "2026-02-23T19:05:33Z"
+    "title": "Refactor Swin output tracing with @capture_outputs and @can_return_tuple",
+    "updated_at": "2026-02-17T14:15:17Z"
   },
   {
-    "additions": 2202,
-    "author": "liu-jiaxuan",
+    "additions": 41,
+    "author": "preetam1407",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes # (issue) ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Did you read the [contributor guideline](https://github.com/huggingfa\u2026",
-    "changed_files": 16,
-    "cluster_id": "cluster-43098-11",
+    "body_excerpt": "#43979. Refactors SqueezeBert to the standardized output collection interface: - Adds `_can_record_outputs` in `SqueezeBertPreTrainedModel` - Adds `@capture_outputs` on `SqueezeBertModel.forward` - Adds `@can_return_tuple` on task model fo\u2026",
+    "changed_files": 1,
+    "cluster_id": "cluster-43979-28",
     "cluster_ids": [
-      "cluster-43098-11"
+      "cluster-43979-28"
     ],
     "cluster_role": "member",
-    "comments_count": 11,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43707",
-    "created_at": "2026-02-03T13:33:41Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44010",
+    "created_at": "2026-02-15T09:40:09Z",
+    "deletions": 139,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43707/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43707",
-    "labels": [
-      "New model"
-    ],
-    "merged": true,
-    "number": 43707,
-    "review_comments_count": 145,
-    "state": "closed",
-    "title": "[Model] Add SLANeXt Model Support",
-    "updated_at": "2026-03-20T17:24:22Z"
+    "files_url": "https://github.com/huggingface/transformers/pull/44010/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44010",
+    "labels": [],
+    "merged": false,
+    "number": 44010,
+    "review_comments_count": 2,
+    "state": "open",
+    "title": "[SqueezeBert] Migrate to standardized output collection decorators",
+    "updated_at": "2026-03-02T13:04:52Z"
   },
   {
-    "additions": 42,
-    "author": "vasqu",
-    "author_association": "MEMBER",
-    "body_excerpt": "As per title, the new way to call the attention interface has slipped through a refactor because it's too new and not too well known atm cc @yonigozlan",
-    "changed_files": 9,
+    "additions": 1,
+    "author": "mariam851",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Fixes #43976 Updated the documentation to reflect the actual Python requirement (3.10+) as defined in setup.py. Changes: Updated README.md .",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43706",
-    "created_at": "2026-02-03T11:57:22Z",
-    "deletions": 48,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44009",
+    "created_at": "2026-02-15T08:51:26Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43706/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43706",
+    "files_url": "https://github.com/huggingface/transformers/pull/44009/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44009",
     "labels": [],
     "merged": true,
-    "number": 43706,
-    "review_comments_count": 2,
+    "number": 44009,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[`Attn`] Fixup interface usage after refactor",
-    "updated_at": "2026-02-03T14:56:35Z"
+    "title": "update python requirement to 3.10+ to match codebase",
+    "updated_at": "2026-02-16T13:46:56Z"
   },
   {
-    "additions": 120,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Allow the `is_causal` kwarg and config attribute to make well-behaved decoder-only models act as encoders",
+    "additions": 26,
+    "author": "pdwi2020",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## Summary - refactor `ResNetModel` to use `@capture_outputs` for hidden-state collection - register `_can_record_outputs` on `ResNetPreTrainedModel` with `ResNetStage` - switch `ResNetForImageClassification` and `ResNetBackbone` to `@can_\u2026",
     "changed_files": 3,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43705",
-    "created_at": "2026-02-03T11:45:43Z",
-    "deletions": 0,
+    "cluster_id": "cluster-43979-28",
+    "cluster_ids": [
+      "cluster-43979-28"
+    ],
+    "cluster_role": "member",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44007",
+    "created_at": "2026-02-15T07:26:52Z",
+    "deletions": 58,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43705/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43705",
+    "files_url": "https://github.com/huggingface/transformers/pull/44007/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44007",
     "labels": [],
-    "merged": true,
-    "number": 43705,
-    "review_comments_count": 11,
-    "state": "closed",
-    "title": "Allow bi-directional attention for all models",
-    "updated_at": "2026-02-04T17:24:32Z"
+    "merged": false,
+    "number": 44007,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[ResNet] Refactor output tracing to decorator-based interface",
+    "updated_at": "2026-02-19T15:49:49Z"
   },
   {
-    "additions": 1,
-    "author": "francesco-bertolotti",
+    "additions": 8,
+    "author": "cyyever",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "wrong `rms_norm_type` # What does this PR do? Small type error in the configuration of qwen3. `rms_norm_eps` should be a float and not an int. ## Before submitting - [ X] This PR fixes a typo or improves the docs (you can dismiss the other\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? This PR uses torch.xlogy for better numerical handling.",
+    "changed_files": 8,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43703",
-    "created_at": "2026-02-03T10:05:17Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44006",
+    "created_at": "2026-02-15T04:07:50Z",
+    "deletions": 8,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43703/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43703",
+    "files_url": "https://github.com/huggingface/transformers/pull/44006/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44006",
     "labels": [],
     "merged": true,
-    "number": 43703,
+    "number": 44006,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Update configuration_qwen3.py",
-    "updated_at": "2026-02-04T07:03:04Z"
+    "title": "Use torch.xlogy ",
+    "updated_at": "2026-02-17T00:42:54Z"
   },
   {
-    "additions": 2828,
-    "author": "eustlb",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Adds[ UsefulSensors'](https://huggingface.co/UsefulSensors) new ASR model.",
-    "changed_files": 19,
+    "additions": 224,
+    "author": "cyyever",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR transfers grid_thw to a python list at the beginning of some functions to reduce later CUDA sync calls. Therefore, several sync calls are merged into one call.",
+    "changed_files": 16,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43702",
-    "created_at": "2026-02-03T09:32:42Z",
-    "deletions": 247,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44005",
+    "created_at": "2026-02-15T02:34:55Z",
+    "deletions": 254,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43702/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43702",
-    "labels": [
-      "New model"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/44005/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44005",
+    "labels": [],
     "merged": true,
-    "number": 43702,
-    "review_comments_count": 30,
+    "number": 44005,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Add moonshine streaming",
-    "updated_at": "2026-02-12T10:10:16Z"
+    "title": "Reduce reduce CUDA sync",
+    "updated_at": "2026-02-17T01:00:52Z"
   },
   {
-    "additions": 1,
-    "author": "YangKai0616",
+    "additions": 21,
+    "author": "omkar-334",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Here pytorch has a mature mechanism to auto select the right backend for different devices. @ydshieh pls help review, thx!",
+    "body_excerpt": "This PR refactors the `codegen` model as per #43979 cc @molbap <img width=\"843\" height=\"445\" alt=\"Screenshot 2026-02-15 at 5 24 52 AM\" src=\"https://github.com/user-attachments/assets/d5aeb711-96a7-4fd8-af7b-0aeac23eeeb1\" /> 2 tests are bei\u2026",
     "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43699",
-    "created_at": "2026-02-03T07:33:04Z",
-    "deletions": 1,
+    "cluster_id": "cluster-43998-11",
+    "cluster_ids": [
+      "cluster-43998-11"
+    ],
+    "cluster_role": "member",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44004",
+    "created_at": "2026-02-14T23:56:18Z",
+    "deletions": 62,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43699/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43699",
+    "files_url": "https://github.com/huggingface/transformers/pull/44004/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44004",
     "labels": [],
     "merged": false,
-    "number": 43699,
-    "review_comments_count": 3,
-    "state": "closed",
-    "title": "avoid using specified backend for tp tests",
-    "updated_at": "2026-03-09T08:17:48Z"
+    "number": 44004,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "refactor output tracing for `codegen`",
+    "updated_at": "2026-02-17T08:56:07Z"
   },
   {
-    "additions": 1,
-    "author": "sywangyi",
+    "additions": 37,
+    "author": "omkar-334",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "- model loading (from pretrained, etc): @CyrilVallez - distributed: @3outeille @ArthurZucker fix tp crash. crash stack is [rank0]: Traceback (most recent call last): [rank0]: File \"/transformers/benchmark_v2/test_tp.py\", line 29, in <modul\u2026",
-    "changed_files": 1,
+    "body_excerpt": "This PR refactors the `mamba` model as per #43979 cc @molbap <img width=\"859\" height=\"427\" alt=\"Screenshot 2026-02-15 at 5 12 43 AM\" src=\"https://github.com/user-attachments/assets/f23bb675-a9a3-4e21-a6c5-9804910301b4\" /> Note - Only 46 te\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43695",
-    "created_at": "2026-02-03T01:30:55Z",
-    "deletions": 1,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44003",
+    "created_at": "2026-02-14T23:46:10Z",
+    "deletions": 68,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43695/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43695",
+    "files_url": "https://github.com/huggingface/transformers/pull/44003/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44003",
     "labels": [],
-    "merged": true,
-    "number": 43695,
+    "merged": false,
+    "number": 44003,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix gptoss tp crash",
-    "updated_at": "2026-02-03T10:20:30Z"
+    "state": "open",
+    "title": "refactor output tracing in `mamba`",
+    "updated_at": "2026-02-17T07:40:50Z"
   },
   {
-    "additions": 1,
-    "author": "stevhliu",
-    "author_association": "MEMBER",
-    "body_excerpt": "updates link to benchmark's new location",
+    "additions": 7,
+    "author": "omkar-334",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR refactors the `upernet` model as per #43979 cc @molbap <img width=\"856\" height=\"457\" alt=\"Screenshot 2026-02-15 at 4 51 03 AM\" src=\"https://github.com/user-attachments/assets/5dc478d7-d708-4296-a86b-c3bb252d0325\" />",
     "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43694",
-    "created_at": "2026-02-03T01:21:15Z",
-    "deletions": 1,
+    "cluster_id": "cluster-43998-11",
+    "cluster_ids": [
+      "cluster-43998-11"
+    ],
+    "cluster_role": "member",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44002",
+    "created_at": "2026-02-14T23:21:45Z",
+    "deletions": 20,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43694/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43694",
+    "files_url": "https://github.com/huggingface/transformers/pull/44002/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44002",
     "labels": [],
-    "merged": true,
-    "number": 43694,
+    "merged": false,
+    "number": 44002,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "[docs] benchmarks",
-    "updated_at": "2026-02-03T17:00:13Z"
+    "state": "open",
+    "title": "refactor output tracing in `upernet`",
+    "updated_at": "2026-02-17T08:55:16Z"
   },
   {
-    "additions": 1,
-    "author": "WilliamRoyNelson",
-    "author_association": "NONE",
-    "body_excerpt": "# Update doc preprocessing regex to prevent ReDoS The regular expression for capturing docstrings is vulnerable to a [ReDoS attack](https://owasp.org/www-community/attacks/Regular_expression_Denial_of_Service_-_ReDoS) The previous change d\u2026",
+    "additions": 3,
+    "author": "omkar-334",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR refactors the`univnet` model as per #43979 cc @molbap <img width=\"848\" height=\"462\" alt=\"Screenshot 2026-02-15 at 4 19 00 AM\" src=\"https://github.com/user-attachments/assets/75848429-b9ff-49b3-a028-645aa67fc2ad\" />",
     "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43693",
-    "created_at": "2026-02-03T01:06:06Z",
-    "deletions": 1,
+    "cluster_id": "cluster-43998-11",
+    "cluster_ids": [
+      "cluster-43998-11"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44001",
+    "created_at": "2026-02-14T22:50:39Z",
+    "deletions": 9,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43693/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43693",
+    "files_url": "https://github.com/huggingface/transformers/pull/44001/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44001",
     "labels": [],
     "merged": false,
-    "number": 43693,
+    "number": 44001,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Update doc preprocessing regex to prevent ReDoS",
-    "updated_at": "2026-02-03T17:23:59Z"
+    "state": "open",
+    "title": "refactor output tracing in `univnet`",
+    "updated_at": "2026-02-14T23:22:13Z"
   },
   {
-    "additions": 13,
-    "author": "qgallouedec",
-    "author_association": "MEMBER",
-    "body_excerpt": "## Summary On PyTorch 2.10+, `Trainer.train()` crashes at the first `lr_scheduler.step()` when using DeepSpeed ZeRO-3 with a PEFT model. This PR provides fix, alothough I'm sure it's not the ideal one. The failure only appears with torch 2\u2026",
+    "additions": 8,
+    "author": "omkar-334",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR refactors the `vision_text_dual_encoder` model issue as per #43979 cc @molbap <img width=\"876\" height=\"292\" alt=\"Screenshot 2026-02-15 at 4 09 07 AM\" src=\"https://github.com/user-attachments/assets/11147a56-993b-4abc-b07a-ec739a53d\u2026",
     "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43692",
-    "created_at": "2026-02-02T21:42:11Z",
-    "deletions": 0,
+    "cluster_id": "cluster-43998-11",
+    "cluster_ids": [
+      "cluster-43998-11"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/44000",
+    "created_at": "2026-02-14T22:44:14Z",
+    "deletions": 21,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43692/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43692",
+    "files_url": "https://github.com/huggingface/transformers/pull/44000/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/44000",
     "labels": [],
     "merged": false,
-    "number": 43692,
+    "number": 44000,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix DeepSpeed ZeRO-3 + PEFT + Trainer scheduler mismatch on PyTorch 2.10+",
-    "updated_at": "2026-02-03T15:40:19Z"
-  },
-  {
-    "additions": 109,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. All those important static properties (`_keep_in_fp32_modules`, `_no_split_modules`, etc) need to be determined automatically in the case of composite models. This follows the same approach I previ\u2026",
-    "changed_files": 10,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43691",
-    "created_at": "2026-02-02T18:17:44Z",
-    "deletions": 121,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43691/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43691",
-    "labels": [],
-    "merged": true,
-    "number": 43691,
-    "review_comments_count": 10,
-    "state": "closed",
-    "title": "Composite model inherit automatically all important properties from their children",
-    "updated_at": "2026-02-03T16:57:41Z"
+    "state": "open",
+    "title": "refactor output tracing in `vision_text_dual_encoder`",
+    "updated_at": "2026-02-17T07:46:33Z"
   },
   {
     "additions": 10,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fix https://github.com/huggingface/transformers/issues/43645. It's quite unusual, but no reason it should crash in general",
-    "changed_files": 1,
+    "author": "omkar-334",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR refactors the `mobilenet_v1` model as per #43979 cc @molbap",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43690",
-    "created_at": "2026-02-02T17:14:50Z",
-    "deletions": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43999",
+    "created_at": "2026-02-14T22:20:19Z",
+    "deletions": 30,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43690/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43690",
+    "files_url": "https://github.com/huggingface/transformers/pull/43999/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43999",
     "labels": [],
-    "merged": true,
-    "number": 43690,
+    "merged": false,
+    "number": 43999,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix crash of custom models in Notebook or Repl",
-    "updated_at": "2026-02-03T13:20:20Z"
+    "state": "open",
+    "title": "refactor output tracing in `mobilenet_v1`",
+    "updated_at": "2026-02-17T07:52:08Z"
   },
   {
-    "additions": 1,
-    "author": "itazap",
-    "author_association": "MEMBER",
-    "body_excerpt": "`additional_special_tokens_ids`: Removed. Use `extra_special_tokens_ids` instead",
+    "additions": 8,
+    "author": "omkar-334",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "This PR refactors the `timm_backbone` model issue as per #43979 cc @molbap <img width=\"856\" height=\"423\" alt=\"Screenshot 2026-02-15 at 4 10 15 AM\" src=\"https://github.com/user-attachments/assets/26237c3e-7b66-4f0d-a8b5-ffad6ee7c673\" />",
     "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
+    "cluster_id": "cluster-43998-11",
+    "cluster_ids": [
+      "cluster-43998-11"
+    ],
+    "cluster_role": "member",
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43689",
-    "created_at": "2026-02-02T16:18:36Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43998",
+    "created_at": "2026-02-14T22:12:30Z",
+    "deletions": 19,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43689/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43689",
+    "files_url": "https://github.com/huggingface/transformers/pull/43998/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43998",
     "labels": [],
-    "merged": true,
-    "number": 43689,
+    "merged": false,
+    "number": 43998,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "update guide with new attr name for toks",
-    "updated_at": "2026-02-02T21:04:22Z"
-  },
-  {
-    "additions": 164,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? - adds Bandit's S110 that makes sure we don't have a dry `Except` - fixes all occurrences - mark a couple of spots where we could tighten the `Exception` catch all I focused on making changes under `src/transformers\u2026",
-    "changed_files": 18,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43687",
-    "created_at": "2026-02-02T15:29:48Z",
-    "deletions": 150,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43687/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43687",
-    "labels": [],
-    "merged": true,
-    "number": 43687,
-    "review_comments_count": 37,
-    "state": "closed",
-    "title": "Added S110 - try-except-pass rule",
-    "updated_at": "2026-02-03T21:20:36Z"
+    "state": "open",
+    "title": "refactor output tracing in `timm_backbone`",
+    "updated_at": "2026-02-21T07:29:47Z"
   },
   {
-    "additions": 1,
-    "author": "jianchang512",
-    "author_association": "NONE",
-    "body_excerpt": "Tokenization should be performed on the source language, i.e., `fi_text`. # What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes\u2026",
+    "additions": 12,
+    "author": "karthiksuki",
+    "author_association": "FIRST_TIMER",
+    "body_excerpt": "# What does this PR do? This PR migrates the **RegNet** model to the standardized output collection interface as part of the ongoing refactoring effort in issue #43979. Specifically: - Adds the `_can_record_outputs` dictionary to `RegNetPr\u2026",
     "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43686",
-    "created_at": "2026-02-02T15:14:36Z",
-    "deletions": 1,
+    "cluster_id": "cluster-43979-28",
+    "cluster_ids": [
+      "cluster-43979-28"
+    ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43997",
+    "created_at": "2026-02-14T19:57:54Z",
+    "deletions": 45,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43686/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43686",
+    "files_url": "https://github.com/huggingface/transformers/pull/43997/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43997",
     "labels": [],
     "merged": false,
-    "number": 43686,
+    "number": 43997,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix variable referencing error during word segmentation",
-    "updated_at": "2026-02-02T15:16:51Z"
+    "state": "open",
+    "title": "Migrate RegNet to standardized output tracing",
+    "updated_at": "2026-02-14T20:10:22Z"
   },
   {
-    "additions": 2645,
-    "author": "cmakinet",
-    "author_association": "FIRST_TIMER",
-    "body_excerpt": "# What does this PR do? Adds a comprehensive Hugging Face Transformers Skill following the Agent Skills Open Standard as requested in issue #42971. Fixes # (issue) 1. Created a complete Hugging Face Transformers Skill package in `skills/hu\u2026",
-    "changed_files": 14,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43685",
-    "created_at": "2026-02-02T14:48:35Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43685/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43685",
-    "labels": [
-      "Code agent slop"
+    "additions": 44,
+    "author": "beelapranay",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Refactors FNet and CVT output tracing to use the standardized decorators where appropriate. 1. FNet now uses @capture_outputs with _can_record_outputs to collect hidden states. 2. CVT keeps manual hidden-state colle\u2026",
+    "changed_files": 2,
+    "cluster_id": "cluster-43979-28",
+    "cluster_ids": [
+      "cluster-43979-28"
     ],
+    "cluster_role": "member",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43996",
+    "created_at": "2026-02-14T17:55:31Z",
+    "deletions": 134,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43996/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43996",
+    "labels": [],
     "merged": false,
-    "number": 43685,
+    "number": 43996,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "feat: Add Hugging Face Transformers Skill (closes #42971)",
-    "updated_at": "2026-02-03T14:18:13Z"
-  },
-  {
-    "additions": 112,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. https://github.com/huggingface/transformers/pull/41580 broke the `keep_in_fp32_modules` flag as it's supposed to be used only with fp16, not bf16. I added very strict tests on this to avoid name cl\u2026",
-    "changed_files": 5,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43683",
-    "created_at": "2026-02-02T14:14:36Z",
-    "deletions": 26,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43683/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43683",
-    "labels": [
-      "for patch"
-    ],
-    "merged": true,
-    "number": 43683,
-    "review_comments_count": 7,
-    "state": "closed",
-    "title": "[loading] Fix forced upcasting to fp32",
-    "updated_at": "2026-02-02T15:41:47Z"
+    "state": "open",
+    "title": "Refactor FNet and CVT output tracing",
+    "updated_at": "2026-02-14T18:10:17Z"
   },
   {
-    "additions": 34,
-    "author": "githubnemo",
-    "author_association": "MEMBER",
-    "body_excerpt": "PR #41541 refactored `tie_word_embeddings` handling (among other things) which subtly broke detection of T5 v1.1 vs. original detection. As a consequence, decoder output scaling was always applied, regardless of T5 version. This is resolve\u2026",
-    "changed_files": 3,
+    "additions": 21,
+    "author": "akeemlh",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Refactors falcon in partial fulfillment of https://github.com/huggingface/transformers/issues/43979 ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's th\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43681",
-    "created_at": "2026-02-02T13:12:00Z",
-    "deletions": 34,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43995",
+    "created_at": "2026-02-14T14:39:58Z",
+    "deletions": 87,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43681/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43681",
+    "files_url": "https://github.com/huggingface/transformers/pull/43995/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43995",
     "labels": [],
-    "merged": true,
-    "number": 43681,
-    "review_comments_count": 1,
-    "state": "closed",
-    "title": "Fix T5 v1.1 detection",
-    "updated_at": "2026-02-05T11:02:03Z"
+    "merged": false,
+    "number": 43995,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "Refactoring falcon model to match standardized output collection interface",
+    "updated_at": "2026-02-14T14:41:00Z"
   },
   {
-    "additions": 151,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? I also want to see if linear cache thing can be squeezed in this PR. If it requires big diffs, I'll split into two Fixes https://github.com/huggingface/transformers/issues/43673 Sidenote: kinda breaking but in a goo\u2026",
-    "changed_files": 12,
+    "additions": 12,
+    "author": "saurav0369",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### Docs: Fix Typos and Standardize Naming This PR fixes various typos, duplicate words, and capitalization inconsistencies across the documentation to improve readability and ensure professional branding. | File | Changes Made | | :--- |\u2026",
+    "changed_files": 7,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43679",
-    "created_at": "2026-02-02T11:29:06Z",
-    "deletions": 112,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43993",
+    "created_at": "2026-02-14T10:11:40Z",
+    "deletions": 12,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43679/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43679",
+    "files_url": "https://github.com/huggingface/transformers/pull/43993/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43993",
     "labels": [],
     "merged": true,
-    "number": 43679,
-    "review_comments_count": 22,
+    "number": 43993,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "\ud83d\udea8 Generation cache preparation",
-    "updated_at": "2026-02-04T13:22:50Z"
+    "title": "docs: fix typos across documentation files",
+    "updated_at": "2026-02-16T13:41:41Z"
   },
   {
-    "additions": 10,
-    "author": "kaixuanliu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "In this PR, we do several things for llava_onevision model: 1. skip torch_exportable tests as it does not support it 2. unify expected output for cuda and xpu 3. add `image_sizes` param in `flash_attn_inference_equivalence` func to support\u2026",
-    "changed_files": 3,
+    "additions": 3,
+    "author": "taovinci0",
+    "author_association": "NONE",
+    "body_excerpt": "Replaces mutable default dict `weights={}` with `weights=None` and initializes inside the function. The dict is mutated via `weights[full_key] = w`, which can cause unexpected behavior across multiple calls.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43678",
-    "created_at": "2026-02-02T10:31:21Z",
-    "deletions": 11,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43991",
+    "created_at": "2026-02-14T00:00:00Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43678/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43678",
+    "files_url": "https://github.com/huggingface/transformers/pull/43991/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43991",
     "labels": [],
-    "merged": true,
-    "number": 43678,
-    "review_comments_count": 10,
+    "merged": false,
+    "number": 43991,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Llava onevision: output align for tests and add `image_sizes` input param",
-    "updated_at": "2026-02-03T14:30:36Z"
+    "title": "fix: replace mutable default argument in _read_h5_weights",
+    "updated_at": "2026-02-16T11:18:06Z"
   },
   {
-    "additions": 7,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 1,
+    "additions": 10,
+    "author": "Abhijeetsingh610",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes a crash in `AutoVideoProcessor` when `torchvision` is unavailable. `VIDEO_PROCESSOR_MAPPING_NAMES` can contain `None`, and `video_processor_class_from_name` was doing `if class_name in extractors`, which rais\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43677",
-    "created_at": "2026-02-02T10:31:05Z",
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43989",
+    "created_at": "2026-02-13T20:48:03Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43677/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43677",
+    "files_url": "https://github.com/huggingface/transformers/pull/43989/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43989",
     "labels": [],
     "merged": false,
-    "number": 43677,
+    "number": 43989,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "one possible fix issue #43676",
-    "updated_at": "2026-02-20T08:43:59Z"
+    "state": "open",
+    "title": "Fix AutoVideoProcessor class lookup when torchvision is unavailable",
+    "updated_at": "2026-02-18T17:52:34Z"
   },
   {
-    "additions": 20,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? in `PreTrainedTokenizerBase.from_pretrained` this commit https://github.com/huggingface/transformers/commit/73a13f86f6d208882d59d1200609986c5a5f49a7#diff-85b29486a884f445b1014[\u2026]f4ae701ee758a754fddcc1L1679 silenced\u2026",
+    "additions": 7,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? The following failing tests were identified and fixed in this PR: \u2192 **LayoutXLM:** [This PR (rm slow tokenizers)](https://github.com/huggingface/transformers/pull/40936) changed [models/auto/tokenization_auto.py](\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43675",
-    "created_at": "2026-02-02T09:52:19Z",
-    "deletions": 24,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43988",
+    "created_at": "2026-02-13T20:03:28Z",
+    "deletions": 9,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43675/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43675",
-    "labels": [
-      "bug"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/43988/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43988",
+    "labels": [],
     "merged": true,
-    "number": 43675,
+    "number": 43988,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "make sure hub errors are surfaced",
-    "updated_at": "2026-02-02T12:41:11Z"
+    "title": "fix(testing): Fix LayoutXLM tokenization test and LightOnOCR SDPA flash test failures on main CI",
+    "updated_at": "2026-02-23T14:07:59Z"
   },
   {
-    "additions": 30,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/43262 Supersedes https://github.com/huggingface/transformers/pull/43273 and https://github.com/huggingface/transformers/pull/43660 I don't think the prev PR w\u2026",
+    "additions": 47,
+    "author": "winglian",
+    "author_association": "COLLABORATOR",
+    "body_excerpt": "# What does this PR do? Accelerator has a lot of other args that can be passed to it like fp8 support, etc, but requires extensive monkey patching downstream to make it work. This makes it easier to extend the accelerator args building met\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43674",
-    "created_at": "2026-02-02T09:32:00Z",
-    "deletions": 42,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43987",
+    "created_at": "2026-02-13T18:51:56Z",
+    "deletions": 38,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43674/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43674",
+    "files_url": "https://github.com/huggingface/transformers/pull/43987/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43987",
     "labels": [],
     "merged": true,
-    "number": 43674,
+    "number": 43987,
     "review_comments_count": 2,
     "state": "closed",
-    "title": "Use correct sampling rate in chat template",
-    "updated_at": "2026-02-02T11:32:37Z"
+    "title": "split out accelerator args builder method",
+    "updated_at": "2026-02-16T14:59:03Z"
   },
   {
-    "additions": 383,
-    "author": "YangKai0616",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? According to PR https://github.com/huggingface/transformers/pull/43030, update test files `tests/models/pe_audio/test_modeling_pe_audio.py` and `tests/models/pe_video/test_modeling_pe_video.py`. The changes to `src/\u2026",
-    "changed_files": 4,
+    "additions": 1828,
+    "author": "winglian",
+    "author_association": "COLLABORATOR",
+    "body_excerpt": "# What does this PR do? The `_inner_training_loop` method has a lot going on which makes it hard to extend for downstream developers/libraries. This PR breaks it up into smaller well described methods that are chained in the training loop.\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43672",
-    "created_at": "2026-02-02T04:10:06Z",
-    "deletions": 2,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43985",
+    "created_at": "2026-02-13T17:55:01Z",
+    "deletions": 251,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43672/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43672",
+    "files_url": "https://github.com/huggingface/transformers/pull/43985/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43985",
     "labels": [],
     "merged": false,
-    "number": 43672,
+    "number": 43985,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix pe uts",
-    "updated_at": "2026-02-05T11:03:54Z"
+    "title": "Refactor inner training loop",
+    "updated_at": "2026-03-09T19:57:50Z"
   },
   {
-    "additions": 1,
-    "author": "yiliu30",
+    "additions": 2,
+    "author": "materight",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Signed-off-by: yiliu30 <yi4.liu@intel.com> - Qwen/Qwen3-30B-A3B-Instruct-2507-FP8 ```bash input = { \"messages\": [ { \"role\": \"user\", \"content\": [ { \"type\": \"text\", \"text\": \"The history of France is \", } ], }, ], } I have a question about th\u2026",
+    "body_excerpt": "# What does this PR do? Removes unused `.squeeze` from VJEPA2 embeddings rotation. Currently the squeeze does nothing on video input since torch skips it if the dimension is not 1. Exporting to onnx and compiling to TensorRT instead fails\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43670",
-    "created_at": "2026-02-02T02:06:14Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43984",
+    "created_at": "2026-02-13T17:53:16Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43670/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43670",
+    "files_url": "https://github.com/huggingface/transformers/pull/43984/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43984",
     "labels": [],
     "merged": true,
-    "number": 43670,
+    "number": 43984,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix FP8Expert for Qwen",
-    "updated_at": "2026-02-02T15:18:49Z"
+    "title": "Remove unused squeeze from VJEPA2 embeddings rotation",
+    "updated_at": "2026-02-13T21:56:01Z"
   },
   {
-    "additions": 2,
-    "author": "fschlatt",
+    "additions": 62,
+    "author": "Aki-07",
     "author_association": "CONTRIBUTOR",
     "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
     "changed_files": 2,
+    "cluster_id": "cluster-43979-28",
+    "cluster_ids": [
+      "cluster-43979-28"
+    ],
+    "cluster_role": "canonical",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43983",
+    "created_at": "2026-02-13T17:52:45Z",
+    "deletions": 188,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43983/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43983",
+    "labels": [],
+    "merged": true,
+    "number": 43983,
+    "review_comments_count": 4,
+    "state": "closed",
+    "title": "Migrate GPT2 to standardized output capture decorators",
+    "updated_at": "2026-02-18T10:40:51Z"
+  },
+  {
+    "additions": 1,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR create a `.git-blame-ignore-revs` file to ignore the following commit https://github.com/huggingface/transformers/pull/43914 when using git blame.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43669",
-    "created_at": "2026-02-01T09:47:44Z",
-    "deletions": 2,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43982",
+    "created_at": "2026-02-13T17:13:41Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43669/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43669",
+    "files_url": "https://github.com/huggingface/transformers/pull/43982/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43982",
     "labels": [],
     "merged": true,
-    "number": 43669,
+    "number": 43982,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix norm_eps dtype",
-    "updated_at": "2026-02-03T14:34:00Z"
+    "title": "create .git-blame-ignore-revs file ",
+    "updated_at": "2026-02-16T13:08:22Z"
   },
   {
-    "additions": 2,
-    "author": "CodeByKodi",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "- index.md: \"multimodal model\" \u2192 \"multimodal models\" - quicktour.md: \"Pytorch\" \u2192 \"PyTorch\" # What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in\u2026",
-    "changed_files": 2,
+    "additions": 5,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "Some of our image processors have a fast return for images that are already square. However, this fast return skips the `background_color` check, which causes flaky test failures because the `test_padding` test uses `self.assertRaises()` t\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43666",
-    "created_at": "2026-02-01T03:24:56Z",
-    "deletions": 2,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43981",
+    "created_at": "2026-02-13T17:01:51Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43666/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43666",
+    "files_url": "https://github.com/huggingface/transformers/pull/43981/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43981",
     "labels": [],
     "merged": true,
-    "number": 43666,
-    "review_comments_count": 0,
+    "number": 43981,
+    "review_comments_count": 3,
     "state": "closed",
-    "title": "Docs: fix typos in Get started (index, quicktour)",
-    "updated_at": "2026-02-02T17:47:53Z"
+    "title": "Fix early image processor return not raising error",
+    "updated_at": "2026-02-16T16:40:41Z"
   },
   {
-    "additions": 9,
-    "author": "ydshieh2",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 2,
+    "additions": 3,
+    "author": "albertvillanova",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Escape `%` in `help` for `ArgumentParser.add_argument` to fix TypeError: > TypeError: not enough arguments for format string Context: https://docs.python.org/3/library/argparse.html#help > As the help string support\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43665",
-    "created_at": "2026-01-31T21:50:43Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43980",
+    "created_at": "2026-02-13T15:43:52Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43665/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43665",
+    "files_url": "https://github.com/huggingface/transformers/pull/43980/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43980",
     "labels": [],
     "merged": false,
-    "number": 43665,
+    "number": 43980,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "fix",
-    "updated_at": "2026-03-24T14:42:35Z"
+    "state": "closed",
+    "title": "Escape % in help for ArgumentParser.add_argument to fix TypeError",
+    "updated_at": "2026-02-17T17:30:07Z"
   },
   {
-    "additions": 1256,
-    "author": "jonathan-fulton",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? This PR introduces a declarative pipeline composition system that enables chaining multiple HuggingFace pipelines into unified workflows. It provides a clean, Pythonic API for building multi-model applications with\u2026",
-    "changed_files": 3,
+    "additions": 0,
+    "author": "NicoSimo",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? Minor fix, resolves some older references to Python3.9. Fixes #43976 ## Before submitting - [x] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Did you read the\u2026",
+    "changed_files": 0,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43664",
-    "created_at": "2026-01-31T21:35:02Z",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43978",
+    "created_at": "2026-02-13T14:40:54Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43664/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43664",
+    "files_url": "https://github.com/huggingface/transformers/pull/43978/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43978",
     "labels": [],
     "merged": false,
-    "number": 43664,
+    "number": 43978,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add pipeline composition DSL for multi-model workflows",
-    "updated_at": "2026-02-03T13:45:11Z"
+    "title": "Update references to Python3.9 to Python3.10. Resolves #43976",
+    "updated_at": "2026-02-13T17:00:07Z"
   },
   {
-    "additions": 13,
-    "author": "abigailtech",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Adds _get_signature_columns method that custom trainers can override to specify which columns to preserve when remove_unused_columns=True. Currently, TRL trainers (DPO, GRPO) must override _set_signature_columns_if_needed entirely just to\u2026",
-    "changed_files": 1,
+    "additions": 48,
+    "author": "IlyasMoutawwakil",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43663",
-    "created_at": "2026-01-31T21:34:03Z",
-    "deletions": 12,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43977",
+    "created_at": "2026-02-13T13:18:49Z",
+    "deletions": 23,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43977/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43977",
+    "labels": [],
+    "merged": true,
+    "number": 43977,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "stable grouped_mm API",
+    "updated_at": "2026-02-16T11:09:33Z"
+  },
+  {
+    "additions": 1659,
+    "author": "Abubakar-rashid",
+    "author_association": "NONE",
+    "body_excerpt": "This fixes issue #43957 reported by @xvdp, where models fail to load when using [torch.device('meta')](vscode-file://vscode-app/c:/Users/Priva/AppData/Local/Programs/Microsoft%20VS%20Code/_/resources/app/out/vs/code/electron-browser/workbe\u2026",
+    "changed_files": 28,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43974",
+    "created_at": "2026-02-13T11:27:42Z",
+    "deletions": 381,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43663/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43663",
+    "files_url": "https://github.com/huggingface/transformers/pull/43974/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43974",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 43974,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix: Replace torch.linspace().item() with python_linspace() to resolv\u2026",
+    "updated_at": "2026-02-16T13:46:49Z"
+  },
+  {
+    "additions": 3909,
+    "author": "MHRDYN7",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Add support for lfm 2/2.5 audio models. (closes #43909)",
+    "changed_files": 16,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43973",
+    "created_at": "2026-02-13T09:36:59Z",
+    "deletions": 0,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/43973/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43973",
     "labels": [],
     "merged": false,
-    "number": 43663,
+    "number": 43973,
     "review_comments_count": 0,
     "state": "open",
-    "title": "Add _get_signature_columns method to allow custom trainers to override column filtering",
-    "updated_at": "2026-02-03T13:32:17Z"
+    "title": "Add lfm2.5 audio",
+    "updated_at": "2026-02-21T16:42:21Z"
   },
   {
-    "additions": 7,
-    "author": "ydshieh",
+    "additions": 2219,
+    "author": "zucchini-nlp",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? The file `utils/process_bad_commit_report.py` tried to get a team member to ping on slack, including someone merged a PR despite they are not the author of that PR. However, that part was written before we have PR c\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? Following Ernie, we build 3d positions based on `mm_token_type_ids` and the models will return them by default from `processor`. We have a unified `get_vision_position` in the qwen2-vl model file, all other models j\u2026",
+    "changed_files": 45,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43662",
-    "created_at": "2026-01-31T21:26:22Z",
-    "deletions": 2,
+    "comments_count": 30,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43972",
+    "created_at": "2026-02-13T09:31:44Z",
+    "deletions": 1611,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43662/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43662",
+    "files_url": "https://github.com/huggingface/transformers/pull/43972/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43972",
     "labels": [],
     "merged": true,
-    "number": 43662,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix `process_bad_commit_report.py`: avoid items to appear in `null` author in the report",
-    "updated_at": "2026-01-31T21:46:01Z"
-  },
-  {
-    "additions": 2999,
-    "author": "jonathan-fulton",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## \ud83d\ude80 Feature: Unified Model Introspection & Diagnostics Framework ### What does this PR do? This PR introduces a comprehensive diagnostics framework for profiling and analyzing transformer models during inference. It provides a unified API\u2026",
-    "changed_files": 17,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43661",
-    "created_at": "2026-01-31T20:17:33Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43661/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43661",
-    "labels": [],
-    "merged": false,
-    "number": 43661,
-    "review_comments_count": 0,
+    "number": 43972,
+    "review_comments_count": 17,
     "state": "closed",
-    "title": "feat: Add unified model introspection & diagnostics framework",
-    "updated_at": "2026-02-03T13:45:28Z"
+    "title": ":rotating_light: Unify 3D position ids",
+    "updated_at": "2026-03-05T18:48:30Z"
   },
   {
-    "additions": 71,
-    "author": "jonathan-fulton",
+    "additions": 65,
+    "author": "caffeinism",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes #43262 ### Problem The `apply_chat_template()` method always defaults to 16kHz sampling rate, even when the processor's feature extractor specifies a different rate: ```python processor = AutoProcessor.from_p\u2026",
+    "body_excerpt": "# What does this PR do? 1. According to the paper, this model is designed to reference 250 contexts (10 seconds), but the current implementation uses DynamicCache without employing create_sliding_window_causal_mask, causing it to reference\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43660",
-    "created_at": "2026-01-31T18:58:29Z",
-    "deletions": 0,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43971",
+    "created_at": "2026-02-13T09:28:32Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43660/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43660",
+    "files_url": "https://github.com/huggingface/transformers/pull/43971/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43971",
     "labels": [
-      "Code agent slop"
+      "Audio"
     ],
-    "merged": false,
-    "number": 43660,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 43971,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Fix audio processors defaulting to 16kHz in apply_chat_template",
-    "updated_at": "2026-02-02T12:17:40Z"
+    "title": "[Mimi] Calibrate to ensure encoder streaming performs correctly",
+    "updated_at": "2026-02-23T14:20:01Z"
   },
   {
-    "additions": 2,
-    "author": "jonathan-fulton",
+    "additions": 542,
+    "author": "jackcook",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes #43519 ### Problem The Qwen3VL processor was incorrectly using `merge_size` (spatial merging) instead of `temporal_patch_size` (temporal grouping) when calculating video frame timestamps. ```python # Before (\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? This PR adds support for quantization with [Four Over Six (4/6)](https://github.com/mit-han-lab/fouroversix). Our library currently focuses on quantizing linear layers to NVFP4, including weight, activation, and gra\u2026",
+    "changed_files": 15,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43659",
-    "created_at": "2026-01-31T18:57:21Z",
-    "deletions": 2,
+    "comments_count": 15,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43970",
+    "created_at": "2026-02-13T05:15:44Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43659/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43659",
+    "files_url": "https://github.com/huggingface/transformers/pull/43970/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43970",
     "labels": [],
     "merged": true,
-    "number": 43659,
-    "review_comments_count": 0,
+    "number": 43970,
+    "review_comments_count": 28,
     "state": "closed",
-    "title": "Fix incorrect timestamp calculation in Qwen3VL Processor",
-    "updated_at": "2026-02-09T09:54:09Z"
+    "title": "Add Four Over Six quantization integration",
+    "updated_at": "2026-02-25T09:30:09Z"
   },
   {
-    "additions": 3,
-    "author": "jonathan-fulton",
+    "additions": 6,
+    "author": "jp1924",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes #43335 ### Problem When configuring a SwitchTransformers model with `num_sparse_encoder_layers=0` (intending to have zero sparse layers), a sparse layer is still incorrectly created when `num_layers=1`: ```py\u2026",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43658",
-    "created_at": "2026-01-31T18:56:26Z",
-    "deletions": 3,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43969",
+    "created_at": "2026-02-13T03:47:24Z",
+    "deletions": 9,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43658/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43658",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/43969/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43969",
+    "labels": [],
     "merged": false,
-    "number": 43658,
-    "review_comments_count": 0,
+    "number": 43969,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Fix SwitchTransformers creating sparse layer when num_sparse_*_layers=0",
-    "updated_at": "2026-02-03T15:31:56Z"
+    "title": "Fix for 2D weight error in embedding layer with ZeRO3",
+    "updated_at": "2026-02-20T07:32:14Z"
   },
   {
-    "additions": 6,
-    "author": "jonathan-fulton",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes #43618 ### Problem Since v5, `CLIPVisionModel` and `CLIPTextModel` no longer return attention weights when `output_attentions=True`: ```python model = CLIPModel.from_pretrained(model_path, attn_implementation\u2026",
-    "changed_files": 2,
+    "additions": 90,
+    "author": "qgallouedec",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Enhances `get_json_schema()` and `render_jinja_template()` to support instance methods, class methods, and static methods, not just plain functions. Previously, `get_json_schema()` only worked with standalone functi\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43657",
-    "created_at": "2026-01-31T18:55:33Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43968",
+    "created_at": "2026-02-13T01:43:51Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43657/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43657",
+    "files_url": "https://github.com/huggingface/transformers/pull/43968/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43968",
     "labels": [],
     "merged": true,
-    "number": 43657,
-    "review_comments_count": 0,
+    "number": 43968,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Fix CLIPOutput attentions not being returned",
-    "updated_at": "2026-02-03T13:51:22Z"
+    "title": "Enhance JSON schema generation to support instance, static, and class methods",
+    "updated_at": "2026-02-13T18:01:56Z"
   },
   {
-    "additions": 9,
-    "author": "jonathan-fulton",
+    "additions": 3,
+    "author": "shtse8",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes #43576 The `transformers env` command was failing with: ``` NameError: name 'TypeAdapter' is not defined ``` ### Root Cause The `Serve` class in `serve.py` uses `TypeAdapter` (from pydantic) as a type annotat\u2026",
+    "body_excerpt": "## What does this PR do? Fixes the `AttributeError: 'List' object has no attribute 'dtype'` crash in `run_classification.py` when loading JSON data with list-type labels for multi-label classification (reported in #43116). ### Problem When\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 13,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43656",
-    "created_at": "2026-01-31T18:54:19Z",
-    "deletions": 9,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43967",
+    "created_at": "2026-02-12T23:42:11Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43656/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43656",
+    "files_url": "https://github.com/huggingface/transformers/pull/43967/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43967",
     "labels": [],
     "merged": false,
-    "number": 43656,
-    "review_comments_count": 1,
+    "number": 43967,
+    "review_comments_count": 0,
     "state": "open",
-    "title": "Fix TypeAdapter NameError in transformers CLI",
-    "updated_at": "2026-03-01T12:00:57Z"
+    "title": "Fix AttributeError in run_classification.py when detecting multi-label data",
+    "updated_at": "2026-02-12T23:42:11Z"
   },
   {
-    "additions": 1,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? The PR #43628 changed a bit the format of `new_failures.json` and causes the `check_bad_commit.py` may fail. (\"single-gpu\" is no longer guaranteed to be in the dict)",
+    "additions": 10,
+    "author": "shtse8",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes #43867 When a model has sub-models with different naming conventions (e.g. `model.layers.26.self_attn.o_proj.weight` vs `desc_model.roberta.encoder.layers.7.norm1.weight`), `dot_natural_key` can produce lists\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43655",
-    "created_at": "2026-01-31T18:47:01Z",
-    "deletions": 1,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43966",
+    "created_at": "2026-02-12T23:40:45Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43655/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43655",
+    "files_url": "https://github.com/huggingface/transformers/pull/43966/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43966",
     "labels": [],
     "merged": true,
-    "number": 43655,
-    "review_comments_count": 0,
+    "number": 43966,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Fix `KeyError` in `check_bad_commit.py`",
-    "updated_at": "2026-01-31T18:50:59Z"
+    "title": "Fix TypeError in dot_natural_key when state_dict keys have mixed types at same position",
+    "updated_at": "2026-02-13T17:39:52Z"
   },
   {
-    "additions": 27,
-    "author": "harshaljanjani",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? \u2192 Fixes `test_modeling_big_bird.py::BigBirdModelIntegrationTest::test_fill_mask`. For more details on reproducing the bug, please visit the linked issue! Fixes #43653. ### Before submitting - [ ] This PR fixes a t\u2026",
+    "additions": 77,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "updates `tokenizer_summary.md`: - condense intro and subword tokenization sections since this doc is mostly about BPE/Unigram/WordPiece - removes some redundant and introductory motivation content and links to the course for more info - pl\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 11,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43654",
-    "created_at": "2026-01-31T18:31:06Z",
-    "deletions": 1,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43965",
+    "created_at": "2026-02-12T22:08:33Z",
+    "deletions": 200,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43654/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43654",
+    "files_url": "https://github.com/huggingface/transformers/pull/43965/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43965",
     "labels": [],
-    "merged": false,
-    "number": 43654,
-    "review_comments_count": 7,
-    "state": "open",
-    "title": "fix(tokenizer): Avert special token property overwrites in batch add_tokens calls",
-    "updated_at": "2026-03-26T06:33:38Z"
+    "merged": true,
+    "number": 43965,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "[docs] tokenizer summary",
+    "updated_at": "2026-02-17T18:17:25Z"
   },
   {
-    "additions": 1,
-    "author": "jonathan-fulton",
+    "additions": 72,
+    "author": "tohtana",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes the model/processor mismatch in the SigLIP2 documentation quantization example. Fixes #39692 ## The Problem The quantization example used mismatched model and processor checkpoints: - Model: `google/siglip2-l\u2026",
-    "changed_files": 1,
+    "body_excerpt": "This PR fixes a ZeRO-3 checkpoint loading failure in Transformers\u2019 conversion-mapped loading path. In affected cases, many parameters are reported as missing and are actually not restored from checkpoint (they get reinitialized). `transfor\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43652",
-    "created_at": "2026-01-31T18:17:25Z",
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43964",
+    "created_at": "2026-02-12T22:01:48Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43652/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43652",
+    "files_url": "https://github.com/huggingface/transformers/pull/43964/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43964",
     "labels": [],
-    "merged": true,
-    "number": 43652,
-    "review_comments_count": 0,
+    "merged": false,
+    "number": 43964,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Fix model/processor mismatch in SigLIP2 quantization example",
-    "updated_at": "2026-02-03T13:38:04Z"
+    "title": "Fix ZeRO-3 conversion-mapped checkpoint loading",
+    "updated_at": "2026-02-13T22:58:54Z"
   },
   {
-    "additions": 13,
-    "author": "abigailtech",
+    "additions": 5,
+    "author": "DimiChatzipavlis",
     "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Added a _loss_is_scaled_for_ga property that custom trainers can override to explicitly control gradient accumulation loss scaling. The default implementation preserves backward compatibility. Custom trainers can now simply override this p\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? Refactors usages of `file.readlines()` to more Pythonic equivalents (`list(file)` or direct iteration) in core tokenization and utility files. **Key Improvements:** 1. **Memory Optimization:** Replaced `list(f.readl\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43651",
-    "created_at": "2026-01-31T17:51:42Z",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43963",
+    "created_at": "2026-02-12T21:16:47Z",
     "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43651/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43651",
+    "files_url": "https://github.com/huggingface/transformers/pull/43963/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43963",
     "labels": [],
     "merged": false,
-    "number": 43651,
+    "number": 43963,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Add _loss_is_scaled_for_ga to allow custom trainers to control gradient accumulation loss scaling",
-    "updated_at": "2026-03-13T22:28:27Z"
+    "state": "closed",
+    "title": "Refactor redundant .readlines() with list()",
+    "updated_at": "2026-02-13T12:49:22Z"
   },
   {
-    "additions": 158,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 8,
+    "additions": 59,
+    "author": "DimiChatzipavlis",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Refactors `tests/cli/test_serve.py` to use native Python `assert` statements instead of legacy `unittest.TestCase` assertion methods. This modernization aligns the CLI tests with `pytest` best practices, enabling: -\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 85,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43649",
-    "created_at": "2026-01-31T14:06:38Z",
-    "deletions": 143,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43962",
+    "created_at": "2026-02-12T18:02:43Z",
+    "deletions": 64,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43649/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43649",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/43962/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43962",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 43649,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "Check new failures reporting 5",
-    "updated_at": "2026-02-16T07:56:27Z"
-  },
-  {
-    "additions": 26,
-    "author": "ydshieh2",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 6,
+    "number": 43962,
+    "review_comments_count": 2,
+    "state": "closed",
+    "title": "Refactor CLI tests using native pytest assertions",
+    "updated_at": "2026-02-13T12:49:11Z"
+  },
+  {
+    "additions": 12,
+    "author": "DimiChatzipavlis",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes a common Python pitfall regarding **mutable default arguments**. In Python, default arguments are evaluated only once at function definition time. If a mutable object (like a `list`) is used as a default, that\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43648",
-    "created_at": "2026-01-31T12:23:05Z",
-    "deletions": 22,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43961",
+    "created_at": "2026-02-12T17:31:19Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43648/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43648",
+    "files_url": "https://github.com/huggingface/transformers/pull/43961/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43961",
     "labels": [],
     "merged": false,
-    "number": 43648,
+    "number": 43961,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "check",
-    "updated_at": "2026-01-31T21:51:06Z"
+    "state": "open",
+    "title": "Replace mutable default arguments with None",
+    "updated_at": "2026-02-13T12:45:04Z"
   },
   {
-    "additions": 22,
-    "author": "ydshieh",
+    "additions": 1194,
+    "author": "remi-or",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 6,
+    "body_excerpt": "# Summary This PR adds the asynchronous batching feature to continuous batching (CB). Asynchronous batching, through the use of more VRAM and CUDA streams and events, greatly reduces the CPU overhead of preparing and updating batches by hi\u2026",
+    "changed_files": 11,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43647",
-    "created_at": "2026-01-31T11:38:27Z",
-    "deletions": 100,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43960",
+    "created_at": "2026-02-12T17:20:38Z",
+    "deletions": 666,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43647/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43647",
+    "files_url": "https://github.com/huggingface/transformers/pull/43960/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43960",
     "labels": [],
-    "merged": false,
-    "number": 43647,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 43960,
+    "review_comments_count": 39,
     "state": "closed",
-    "title": "check",
-    "updated_at": "2026-02-13T02:07:05Z"
+    "title": "[CB] [Major] Asynchronous batching",
+    "updated_at": "2026-02-23T10:11:28Z"
   },
   {
-    "additions": 1,
-    "author": "prachigurav1203",
-    "author_association": "NONE",
-    "body_excerpt": "change # What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully\u2026",
-    "changed_files": 1,
+    "additions": 32,
+    "author": "DimiChatzipavlis",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes potential `UnicodeDecodeError` on Windows (and other environments where the default encoding is not UTF-8) by enforcing `encoding=\"utf-8\"` in standard `open()` calls across the core library. ## Modifications A\u2026",
+    "changed_files": 10,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43642",
-    "created_at": "2026-01-31T07:02:11Z",
-    "deletions": 1,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43959",
+    "created_at": "2026-02-12T17:00:15Z",
+    "deletions": 32,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43642/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43642",
+    "files_url": "https://github.com/huggingface/transformers/pull/43959/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43959",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 43642,
+    "number": 43959,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Improve benchmark_config documentation",
-    "updated_at": "2026-02-03T13:26:46Z"
+    "title": "Enforce explicit UTF-8 encoding in core Library to prevent Windows crashes",
+    "updated_at": "2026-02-13T12:50:43Z"
   },
   {
-    "additions": 1,
-    "author": "KOKOSde",
+    "additions": 2,
+    "author": "MekkCyber",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Docs fix for SigLIP2 usage examples. - Use the correct `torch_dtype=` argument in examples (instead of `dtype=`). - Fix the processor model id in the 4-bit example to match the model being loaded. Docs-only change.",
+    "body_excerpt": "# What does this PR do? Update the quantization docker file to add kernels",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43641",
-    "created_at": "2026-01-31T07:00:56Z",
-    "deletions": 1,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43958",
+    "created_at": "2026-02-12T16:31:42Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43641/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43641",
+    "files_url": "https://github.com/huggingface/transformers/pull/43958/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43958",
     "labels": [],
-    "merged": true,
-    "number": 43641,
-    "review_comments_count": 5,
+    "merged": false,
+    "number": 43958,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Docs: fix SigLIP2 usage examples",
-    "updated_at": "2026-03-06T20:02:56Z"
+    "title": "[kernels] update docker file",
+    "updated_at": "2026-03-25T10:31:24Z"
   },
   {
-    "additions": 45,
-    "author": "KOKOSde",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary Detect adapter repositories (via `adapter_config.json`) and raise a clear, actionable error when `peft` is not installed, instead of the generic \"missing pytorch_model.bin/model.safetensors\" message. ## Changes - Check for `adap\u2026",
+    "additions": 8,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Supersedes https://github.com/huggingface/transformers/pull/43913 After scanning the collection [here](https://huggingface.co/collections/Qwen/qwen3-vl), all models already have merged experts but need a transpose",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43640",
-    "created_at": "2026-01-31T07:00:53Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43956",
+    "created_at": "2026-02-12T16:16:08Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43640/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43640",
-    "labels": [
-      "Code agent slop"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/43956/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43956",
+    "labels": [],
     "merged": false,
-    "number": 43640,
+    "number": 43956,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Improve error message for PEFT adapter without peft",
-    "updated_at": "2026-02-03T13:26:19Z"
+    "title": "Fix qwen3-vl-moe weight mapping",
+    "updated_at": "2026-02-12T17:29:22Z"
   },
   {
     "additions": 5,
-    "author": "Justynita",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? The Quickstart \"Open in Colab\" example can fail in a fresh Google Colab environment due to an incompatible preinstalled version of `huggingface_hub`. This PR updates the installation command to explicitly constrain\u2026",
+    "author": "MekkCyber",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes kernel versions for new builds with torch 2.10",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43639",
-    "created_at": "2026-01-31T04:47:25Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43955",
+    "created_at": "2026-02-12T16:01:58Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43639/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43639",
+    "files_url": "https://github.com/huggingface/transformers/pull/43955/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43955",
     "labels": [],
-    "merged": false,
-    "number": 43639,
+    "merged": true,
+    "number": 43955,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix Quickstart Colab dependency mismatch",
-    "updated_at": "2026-02-03T13:25:12Z"
+    "title": "[kernels] fix kernel versions ",
+    "updated_at": "2026-02-13T07:32:33Z"
   },
   {
-    "additions": 37,
-    "author": "22navyakumar",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "additions": 3,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "The LASR model uses `layerdrop`, which we forgot to disable in the tests. Since the tests only have 2 hidden layers, and the default layerdrop chance is `0.1`, this means there's a `0.1^2 = 1%` chance in any test that doesn't call `model.e\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43637",
-    "created_at": "2026-01-31T00:12:02Z",
-    "deletions": 8,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43954",
+    "created_at": "2026-02-12T14:45:01Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43637/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43637",
-    "labels": [
-      "Code agent slop"
-    ],
-    "merged": false,
-    "number": 43637,
+    "files_url": "https://github.com/huggingface/transformers/pull/43954/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43954",
+    "labels": [],
+    "merged": true,
+    "number": 43954,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Exercise 1: MX Linear for LlamaAttention",
-    "updated_at": "2026-02-03T12:34:49Z"
+    "title": "Fix LASR test layerdrop issue",
+    "updated_at": "2026-02-12T17:03:42Z"
   },
   {
-    "additions": 11,
-    "author": "abigailtech",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "Adds a private _metrics dict to the Trainer class that allows custom trainers to log metrics without overriding log. Custom trainers can now simply do: `self._metrics[mode][\"my_metric\"].append(value)` And the metrics will be automatically\u2026",
+    "additions": 1,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "Fixes https://github.com/huggingface/transformers/issues/43931, no transpose needed after standardizing the model impl to inherit from Qwen3-MoE",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43636",
-    "created_at": "2026-01-30T21:53:43Z",
-    "deletions": 0,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43953",
+    "created_at": "2026-02-12T14:43:59Z",
+    "deletions": 15,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43636/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43636",
+    "files_url": "https://github.com/huggingface/transformers/pull/43953/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43953",
     "labels": [],
     "merged": false,
-    "number": 43636,
-    "review_comments_count": 2,
-    "state": "open",
-    "title": "Add _metrics dict to Trainer for custom metric logging",
-    "updated_at": "2026-03-13T22:22:03Z"
-  },
-  {
-    "additions": 48,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Add explicit commit info to PR comment CI feedback + improve info. contained in the new failures report",
-    "changed_files": 3,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 21,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43635",
-    "created_at": "2026-01-30T20:39:22Z",
-    "deletions": 9,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43635/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43635",
-    "labels": [],
-    "merged": true,
-    "number": 43635,
+    "number": 43953,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add explicit commit info to PR comment CI feedback",
-    "updated_at": "2026-01-31T11:34:48Z"
+    "title": "Fix conversion mapping for Qwen3VL-MoE",
+    "updated_at": "2026-02-16T18:02:05Z"
   },
   {
-    "additions": 10,
-    "author": "Rocketknight1",
+    "additions": 78,
+    "author": "zucchini-nlp",
     "author_association": "MEMBER",
-    "body_excerpt": "There are some flaky test failures that are very hard to track down. I suspect the modification of core mappings and `importlib.reload()` in `test_voxtral_tokenizer_converts_from_tekken` has some kind of weird race condition or state mutat\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/trl/issues/5088 We shouldn't use `rope_deltas` from prev `generation/forward` call if this is a new generation from scratch. This is already correctly implemented in `compute_3d_\u2026",
+    "changed_files": 14,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43634",
-    "created_at": "2026-01-30T16:14:31Z",
-    "deletions": 24,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43952",
+    "created_at": "2026-02-12T14:34:52Z",
+    "deletions": 31,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43634/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43634",
+    "files_url": "https://github.com/huggingface/transformers/pull/43952/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43952",
     "labels": [],
     "merged": true,
-    "number": 43634,
+    "number": 43952,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Don't modify mappings in tests",
-    "updated_at": "2026-01-30T16:48:06Z"
+    "title": "Fix qwen-vl position ids when generating several times",
+    "updated_at": "2026-02-12T16:22:39Z"
   },
   {
-    "additions": 324,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Makes sure that the attn implementation is set to all sub-configs. The `config.encoder.text_config` was not getting its attn set because we aren't passing it to `PreTrainedModel.__init__`. We can't change the model\u2026",
-    "changed_files": 8,
+    "additions": 4,
+    "author": "lordaarush",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes #43883 After #42270, `all_tied_weights_keys` is initialized in `post_init()`, but remote models loaded with `trust_remote_code=True` don't always call `post_init()` properly, causing `AttributeError` when load\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 17,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43633",
-    "created_at": "2026-01-30T15:50:24Z",
-    "deletions": 258,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43951",
+    "created_at": "2026-02-12T14:32:00Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43633/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43633",
+    "files_url": "https://github.com/huggingface/transformers/pull/43951/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43951",
     "labels": [],
-    "merged": true,
-    "number": 43633,
-    "review_comments_count": 15,
+    "merged": false,
+    "number": 43951,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "\ud83d\udea8 T5Gemma2 model structure",
-    "updated_at": "2026-02-04T14:44:55Z"
+    "title": "Fix AttributeError for remote models with trust_remote_code=True",
+    "updated_at": "2026-02-12T15:39:38Z"
   },
   {
-    "additions": 3,
-    "author": "vasqu",
-    "author_association": "MEMBER",
-    "body_excerpt": "Disabling the validation of args torch distributions due to a) dummy values not always being sound b) seemingly torch internals failing checks (?); unsure not used to parsing torch export reports Following errors are in export otherwise: -\u2026",
-    "changed_files": 1,
+    "additions": 61,
+    "author": "DimiChatzipavlis",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes a collection of spelling errors found throughout `src/transformers` in docstrings, comments, and user-facing error messages. ## Modifications Corrected the following typos across multiple files in `src/transfo\u2026",
+    "changed_files": 45,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43631",
-    "created_at": "2026-01-30T14:49:56Z",
-    "deletions": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43949",
+    "created_at": "2026-02-12T14:15:42Z",
+    "deletions": 61,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43631/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43631",
+    "files_url": "https://github.com/huggingface/transformers/pull/43949/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43949",
     "labels": [],
-    "merged": false,
-    "number": 43631,
-    "review_comments_count": 4,
+    "merged": true,
+    "number": 43949,
+    "review_comments_count": 5,
     "state": "closed",
-    "title": "[`Export`] Disable argument check in torch distributions for export",
-    "updated_at": "2026-03-14T19:59:23Z"
+    "title": "Fix typos in docstrings, comments, and error messages",
+    "updated_at": "2026-02-12T16:26:10Z"
   },
   {
-    "additions": 94,
-    "author": "ydshieh",
+    "additions": 147,
+    "author": "zucchini-nlp",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Provide more information like - the link to the hub repository containing the whole set of failed tests caused by the PR Also clean up some internal logic ## CI Results [Workflow Run \u2699\ufe0f](https://github.com/huggingfa\u2026",
-    "changed_files": 5,
+    "body_excerpt": "# What does this PR do? Fixes `get_num_of_image_tokens` in idefics3 and adds a test. Aloong the way fixes a few more models Reported in https://github.com/vllm-project/vllm/pull/34358",
+    "changed_files": 25,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 20,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43629",
-    "created_at": "2026-01-30T14:38:34Z",
-    "deletions": 44,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43948",
+    "created_at": "2026-02-12T13:52:37Z",
+    "deletions": 69,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43629/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43629",
+    "files_url": "https://github.com/huggingface/transformers/pull/43948/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43948",
     "labels": [],
     "merged": true,
-    "number": 43629,
+    "number": 43948,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Better new failures reporting for PR comment CI",
-    "updated_at": "2026-01-30T20:43:14Z"
-  },
-  {
-    "additions": 103,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Clean up the `new_failures` reports, also adding the \"trace\". It looks like https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/blob/main/2026-01-30/runs/1596-21518380210/ci_results_run_models_\u2026",
-    "changed_files": 2,
+    "title": "Fix `get_number_of_image_tokens`",
+    "updated_at": "2026-02-12T16:23:37Z"
+  },
+  {
+    "additions": 42,
+    "author": "casinca",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Hello, This is a complementary PR to #42769 (not yet pushed, at the time of writing) in order to fix #42754 Tests passed (at least the ones with a test.) <!-- Congratulations! You've made it this far! You're not qui\u2026",
+    "changed_files": 7,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43628",
-    "created_at": "2026-01-30T14:30:47Z",
-    "deletions": 111,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43947",
+    "created_at": "2026-02-12T13:20:24Z",
+    "deletions": 21,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43628/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43628",
+    "files_url": "https://github.com/huggingface/transformers/pull/43947/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43947",
     "labels": [],
     "merged": true,
-    "number": 43628,
+    "number": 43947,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Improve new failures reporting",
-    "updated_at": "2026-01-30T14:40:01Z"
+    "title": "fix: Better weight decay exclusion in `run_*_no\u2011trainer.py` examples",
+    "updated_at": "2026-02-12T16:24:43Z"
   },
   {
-    "additions": 495,
-    "author": "Suraj1199",
-    "author_association": "NONE",
-    "body_excerpt": null,
-    "changed_files": 1,
+    "additions": 5,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fix docker files: some issues for `[dev-torch]` and `kernels`",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43627",
-    "created_at": "2026-01-30T14:22:03Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43946",
+    "created_at": "2026-02-12T13:01:56Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43627/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43627",
+    "files_url": "https://github.com/huggingface/transformers/pull/43946/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43946",
     "labels": [],
-    "merged": false,
-    "number": 43627,
+    "merged": true,
+    "number": 43946,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add New Scratch Implementation for Transformers",
-    "updated_at": "2026-01-30T14:23:15Z"
+    "title": "Fix docker files",
+    "updated_at": "2026-02-12T13:11:21Z"
   },
   {
-    "additions": 40,
-    "author": "remi-or",
+    "additions": 25,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "The test `tests/generation/test_paged_attention.py::TestBatchGeneration::test_generate_batch_consistency` is failing because CB shuffles incoming request to maximize prefix caching. This is not a problem unless the user expects outputs to\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? As per the title. Some parts of the computation were not so exact. It should not change anything in general, but may improve perfs on constrained environments",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43626",
-    "created_at": "2026-01-30T14:12:40Z",
-    "deletions": 33,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43945",
+    "created_at": "2026-02-12T12:56:19Z",
+    "deletions": 9,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43626/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43626",
+    "files_url": "https://github.com/huggingface/transformers/pull/43945/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43945",
     "labels": [],
     "merged": true,
-    "number": 43626,
+    "number": 43945,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[CB] Keep order of incoming requests",
-    "updated_at": "2026-02-03T09:59:47Z"
+    "title": "Improve memory allocator during loading",
+    "updated_at": "2026-02-13T11:25:07Z"
   },
   {
-    "additions": 3079,
-    "author": "ebezzam",
+    "additions": 4,
+    "author": "Rocketknight1",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Adds VibeVoice ASR, branches off of (and hence depends on) https://github.com/huggingface/transformers/pull/43400 Original checkpoint: https://huggingface.co/microsoft/VibeVoice-ASR Original modeling code: https://g\u2026",
-    "changed_files": 30,
+    "body_excerpt": "We have flaky test failures in `tests/models/qwen3_omni_moe/test_modeling_qwen3_omni_moe.py::Qwen3OmniMoeThinkerForConditionalGenerationModelTest::test_generate_continue_from_past_key_values`. The cause is that the logic in this test drops\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 13,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43625",
-    "created_at": "2026-01-30T14:06:26Z",
-    "deletions": 311,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43944",
+    "created_at": "2026-02-12T12:55:52Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43625/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43625",
-    "labels": [
-      "New model",
-      "Audio"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/43944/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43944",
+    "labels": [],
     "merged": true,
-    "number": 43625,
-    "review_comments_count": 65,
+    "number": 43944,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Add VibeVoice ASR",
-    "updated_at": "2026-03-02T11:29:55Z"
+    "title": "Fix flaky test for multimodal LLMs",
+    "updated_at": "2026-02-12T13:30:17Z"
   },
   {
-    "additions": 1987,
-    "author": "jeeth-kataria",
-    "author_association": "NONE",
-    "body_excerpt": "## What does this PR do? Adds a Claude SKILL for PEFT (Parameter-Efficient Fine-Tuning) and LoRA guidance, complementing the transformers-api skill in #43340. ## Why is this needed? PEFT/LoRA is one of the most popular integrations with Tr\u2026",
-    "changed_files": 7,
+    "additions": 6,
+    "author": "DimiChatzipavlis",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Replaces legacy `.format()` calls with f-strings in several model conversion scripts (`convert_*.py`). ## Modifications Used `flynt` to apply safe transformations to string literals in: - `src/transformers/models/im\u2026",
+    "changed_files": 6,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43624",
-    "created_at": "2026-01-30T13:59:35Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43943",
+    "created_at": "2026-02-12T11:49:23Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43624/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43624",
+    "files_url": "https://github.com/huggingface/transformers/pull/43943/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43943",
     "labels": [],
-    "merged": false,
-    "number": 43624,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 43943,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Add Claude Skill for PEFT/LoRA fine-tuning guidance",
-    "updated_at": "2026-02-03T13:08:11Z"
+    "title": "Modernize string formatting (f-strings) in conversion scripts",
+    "updated_at": "2026-02-12T14:20:49Z"
   },
   {
-    "additions": 77,
-    "author": "zucchini-nlp",
+    "additions": 20,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? as per title",
-    "changed_files": 4,
+    "body_excerpt": "# What does this PR do? Re-add the overwritten test that was mistakenly removed in https://github.com/huggingface/transformers/pull/43916",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43623",
-    "created_at": "2026-01-30T13:21:16Z",
-    "deletions": 47,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43942",
+    "created_at": "2026-02-12T11:20:08Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43623/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43623",
-    "labels": [
-      "for patch"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/43942/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43942",
+    "labels": [],
     "merged": true,
-    "number": 43623,
-    "review_comments_count": 4,
+    "number": 43942,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Tie zamba weights correctly",
-    "updated_at": "2026-02-02T09:17:36Z"
+    "title": "Fix bark test",
+    "updated_at": "2026-02-12T11:34:22Z"
   },
   {
-    "additions": 1149,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. Also fix a vllm regresion on experts. Allow Experts and Attention module to be used on their own, such as ```py import torch from transformers import AutoConfig from transformers.models.mixtral.mod\u2026",
-    "changed_files": 295,
+    "additions": 113,
+    "author": "DimiChatzipavlis",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Adds explicit `encoding=\"utf-8\"` to file I/O operations in several `examples/pytorch/` scripts. ## The Problem On Windows, `open()` defaults to the system encoding (often `cp1252`). This causes crashes (`UnicodeDeco\u2026",
+    "changed_files": 9,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43622",
-    "created_at": "2026-01-30T12:17:22Z",
-    "deletions": 1102,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43941",
+    "created_at": "2026-02-12T11:09:14Z",
+    "deletions": 48,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43622/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43622",
+    "files_url": "https://github.com/huggingface/transformers/pull/43941/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43941",
     "labels": [
-      "for patch"
+      "Code agent slop"
     ],
-    "merged": true,
-    "number": 43622,
-    "review_comments_count": 3,
+    "merged": false,
+    "number": 43941,
+    "review_comments_count": 22,
     "state": "closed",
-    "title": "Allow Attention and Experts to be used as standalone modules",
-    "updated_at": "2026-01-30T16:35:32Z"
+    "title": "Fix UnicodeDecodeError in PyTorch examples on Windows",
+    "updated_at": "2026-02-13T12:50:27Z"
   },
   {
-    "additions": 13,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. Following specs described by @hmellor for vllm compatibility, no logic change should be required. Only the following small changes to mappings should be enough",
+    "additions": 23,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "@ydshieh This PR fixes failed case: `tests/models/exaone_moe/test_modeling_exaone_moe.py::ExaoneMoeIntegrationTest::test_model_logits`, pls help review, thx!",
     "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43621",
-    "created_at": "2026-01-30T11:41:44Z",
-    "deletions": 12,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43621/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43621",
-    "labels": [
-      "for patch"
+    "cluster_id": "cluster-43324-21",
+    "cluster_ids": [
+      "cluster-43324-21"
     ],
+    "cluster_role": "member",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43938",
+    "created_at": "2026-02-12T09:16:04Z",
+    "deletions": 6,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43938/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43938",
+    "labels": [],
     "merged": true,
-    "number": 43621,
+    "number": 43938,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[vllm compat] Separate renaming from conversion ops",
-    "updated_at": "2026-01-30T17:12:06Z"
+    "title": "Fix failed test case for exaone_moe model",
+    "updated_at": "2026-02-27T07:22:46Z"
   },
   {
-    "additions": 281,
-    "author": "vasqu",
-    "author_association": "MEMBER",
-    "body_excerpt": "As per title, this is mainly due to the fact that we have little control over remote code which could lead to solutions like - Closes #43609 --> makes the whole mixin behave like a static holder for methods... - Modify methods/inherited cl\u2026",
-    "changed_files": 137,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43620",
-    "created_at": "2026-01-30T11:24:09Z",
-    "deletions": 288,
+    "additions": 47,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "@ydshieh , pls help review, thx!",
+    "changed_files": 3,
+    "cluster_id": "cluster-43324-21",
+    "cluster_ids": [
+      "cluster-43324-21"
+    ],
+    "cluster_role": "canonical",
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43936",
+    "created_at": "2026-02-12T08:34:03Z",
+    "deletions": 19,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43620/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43620",
+    "files_url": "https://github.com/huggingface/transformers/pull/43936/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43936",
     "labels": [],
     "merged": true,
-    "number": 43620,
-    "review_comments_count": 0,
+    "number": 43936,
+    "review_comments_count": 13,
     "state": "closed",
-    "title": "[`Rope`] Revert #43410 and make inheritance implicit again",
-    "updated_at": "2026-01-30T18:44:16Z"
+    "title": "Fix failed unit tests for moonshine_streaming model",
+    "updated_at": "2026-03-06T07:39:09Z"
   },
   {
-    "additions": 40,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per title, some models add or delete entries in tied weights depending on configuration. If we load two models consecutively with different configs, it fails to tie weights correctly I am copying it in `__init__`\u2026",
-    "changed_files": 4,
+    "additions": 1245,
+    "author": "MekkCyber",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Adds mlx quantization for mps devices leveraging the `kernels` library for pre-built kernels !!",
+    "changed_files": 13,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43619",
-    "created_at": "2026-01-30T10:43:38Z",
-    "deletions": 6,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43934",
+    "created_at": "2026-02-12T07:59:02Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43619/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43619",
-    "labels": [
-      "for patch"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/43934/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43934",
+    "labels": [],
     "merged": true,
-    "number": 43619,
-    "review_comments_count": 8,
+    "number": 43934,
+    "review_comments_count": 20,
     "state": "closed",
-    "title": "Don't modify `tied_weight_keys` in-place",
-    "updated_at": "2026-01-30T15:46:02Z"
+    "title": "[Quantization] Add metal quantization for MPS devices!",
+    "updated_at": "2026-02-27T13:28:31Z"
   },
   {
-    "additions": 17,
-    "author": "kaixuanliu",
+    "additions": 66,
+    "author": "quic-meetkuma",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "@zucchini-nlp pls help review, thx! We have to add back the changes in https://github.com/huggingface/transformers/pull/42523. As for llava_onevision model, in its checkpoint config file, the model's `tie_word_embeddings` is Flase, and mod\u2026",
-    "changed_files": 3,
+    "body_excerpt": "# What does this PR do? This PR adds hardware backend called \"qaic\" which is for Qualcomm's AI Accelerator. The inclusion is similar to any other hardware backend in the Trainer. With this the user will be able to use Qualcomm's AI Acceler\u2026",
+    "changed_files": 9,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43617",
-    "created_at": "2026-01-30T10:21:45Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43617/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43617",
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43933",
+    "created_at": "2026-02-12T06:14:52Z",
+    "deletions": 2,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/43933/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43933",
     "labels": [],
     "merged": false,
-    "number": 43617,
+    "number": 43933,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix tie_word_embedding issue for llava_onevision model",
-    "updated_at": "2026-01-30T14:33:39Z"
+    "title": "Added support for qaic backend for Qualcomm's AI Accelerator",
+    "updated_at": "2026-02-17T16:53:38Z"
   },
   {
     "additions": 3,
-    "author": "yiliu30",
+    "author": "quic-meetkuma",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Signed-off-by: yiliu30 <yi4.liu@intel.com> # What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so mak\u2026",
+    "body_excerpt": "# What does this PR do? It fixes minor issue in the gather_full_tensor function. The existing implementation tries to perform all_gather across all the ranks of distributed training. But when we talk about the case of TP+DP then we only ne\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43616",
-    "created_at": "2026-01-30T08:45:18Z",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43932",
+    "created_at": "2026-02-12T05:49:32Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43616/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43616",
+    "files_url": "https://github.com/huggingface/transformers/pull/43932/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43932",
     "labels": [],
     "merged": true,
-    "number": 43616,
-    "review_comments_count": 2,
+    "number": 43932,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix `FP8Expert` for DeepSeek R1",
-    "updated_at": "2026-02-02T11:55:53Z"
+    "title": "Inclusion of process_group in the gather_full_tensor function in tensor_parallel.py",
+    "updated_at": "2026-02-13T14:29:24Z"
   },
   {
-    "additions": 2,
-    "author": "kaixuanliu",
+    "additions": 25,
+    "author": "cyyever",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "@SunMarc this PR fixes failed case of `tests/quantization/bnb/test_mixed_int8.py::MixedInt8GPT2Test::test_generate_quality_config` for XPU, pls help review, thx!",
-    "changed_files": 1,
-    "cluster_id": "cluster-43324-12",
-    "cluster_ids": [
-      "cluster-43324-12"
-    ],
-    "cluster_role": "member",
+    "body_excerpt": "# What does this PR do? We can use PT 2.4.0 is_autocast_enabled changes to improve code.",
+    "changed_files": 15,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43615",
-    "created_at": "2026-01-30T05:17:49Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43930",
+    "created_at": "2026-02-12T02:04:11Z",
+    "deletions": 20,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43615/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43615",
+    "files_url": "https://github.com/huggingface/transformers/pull/43930/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43930",
     "labels": [],
     "merged": true,
-    "number": 43615,
+    "number": 43930,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "add XPU expected output for MixedInt8GPT2Test",
-    "updated_at": "2026-01-30T16:49:53Z"
+    "title": "Improve use of torch.is_autocast_enabled",
+    "updated_at": "2026-02-17T00:42:39Z"
   },
   {
-    "additions": 2,
-    "author": "kaixuanliu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "We need to add contiguous operation in sdpa stage for xpu as well for this model so as we can use compile mode. pls help review, thx! @IlyasMoutawwakil",
+    "additions": 174,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "the second part to #43900. this pr focuses on customizing tokenizers: - training your own - passing a custom vocabulary to `vocab` and `merges` - subclassing `TokenizersBackend`",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43614",
-    "created_at": "2026-01-30T03:45:47Z",
-    "deletions": 2,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43929",
+    "created_at": "2026-02-11T23:20:18Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43614/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43614",
+    "files_url": "https://github.com/huggingface/transformers/pull/43929/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43929",
     "labels": [],
     "merged": true,
-    "number": 43614,
+    "number": 43929,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "add contiguous operation for diffllama model for xpu to enable compile mode.",
-    "updated_at": "2026-01-30T15:04:20Z"
+    "title": "[docs] customizing tokenizers",
+    "updated_at": "2026-02-17T17:15:32Z"
   },
   {
-    "additions": 1347,
-    "author": "yonigozlan",
-    "author_association": "MEMBER",
-    "body_excerpt": "Add pipeline for sam/sam2/edgetam/sam3_tracker task",
-    "changed_files": 19,
+    "additions": 48,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? The following failing Dia use case was identified and fixed in this PR: \u2192 Tests that created `DiaConfig` with custom token IDs (`eos_token_id=97` for a `vocab_size=100`) failed because saving then reloading the co\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43613",
-    "created_at": "2026-01-30T03:00:57Z",
-    "deletions": 12,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43928",
+    "created_at": "2026-02-11T19:59:42Z",
+    "deletions": 15,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43613/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43613",
+    "files_url": "https://github.com/huggingface/transformers/pull/43928/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43928",
     "labels": [],
-    "merged": false,
-    "number": 43613,
-    "review_comments_count": 4,
-    "state": "open",
-    "title": "Add Promptable Visual Segmentation pipeline",
-    "updated_at": "2026-02-03T16:17:10Z"
+    "merged": true,
+    "number": 43928,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "fix(models): Preserve custom token IDs through DiaConfig save and load",
+    "updated_at": "2026-02-13T10:56:31Z"
   },
   {
-    "additions": 1143,
-    "author": "yonigozlan",
+    "additions": 24,
+    "author": "kashif",
     "author_association": "MEMBER",
-    "body_excerpt": "Add pipeline for SAM3's PCS task",
-    "changed_files": 13,
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43612",
-    "created_at": "2026-01-30T02:53:37Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43926",
+    "created_at": "2026-02-11T19:45:46Z",
+    "deletions": 105,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43612/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43612",
+    "files_url": "https://github.com/huggingface/transformers/pull/43926/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43926",
     "labels": [],
-    "merged": false,
-    "number": 43612,
-    "review_comments_count": 4,
-    "state": "open",
-    "title": "Add Promptable Concept Segmentation pipeline",
-    "updated_at": "2026-02-03T14:49:14Z"
+    "merged": true,
+    "number": 43926,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "[Deespeed] fix WeightConverter.convert() use",
+    "updated_at": "2026-02-16T14:07:30Z"
   },
   {
-    "additions": 1,
-    "author": "KOKOSde",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary Fix a spelling typo in the dynamic weight loading / weight converter documentation. **Change**: `formated` \u2192 `formatted` in `docs/source/en/weightconverter.md` ## Test plan - N/A (documentation-only change) Made with [Cursor](ht\u2026",
-    "changed_files": 1,
+    "additions": 5,
+    "author": "DimiChatzipavlis",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Adds explicit `encoding=\"utf-8\"` to `open()` calls in `.circleci/create_circleci_config.py` and `.circleci/parse_test_outputs.py`. ## The Problem On Windows, `open()` defaults to the system encoding (often `cp1252`)\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43610",
-    "created_at": "2026-01-30T02:06:10Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43925",
+    "created_at": "2026-02-11T19:18:51Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43610/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43610",
+    "files_url": "https://github.com/huggingface/transformers/pull/43925/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43925",
     "labels": [],
     "merged": true,
-    "number": 43610,
+    "number": 43925,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Docs: fix typo in weight converter guide",
-    "updated_at": "2026-02-03T15:38:52Z"
+    "title": "Add explicit utf-8 encoding to CircleCI scripts for Windows compatibility",
+    "updated_at": "2026-02-12T13:59:02Z"
   },
   {
-    "additions": 242,
+    "additions": 576,
     "author": "vasqu",
     "author_association": "MEMBER",
-    "body_excerpt": "As per title, draft to show what would be roughly needed",
-    "changed_files": 3,
+    "body_excerpt": "WIP",
+    "changed_files": 48,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43609",
-    "created_at": "2026-01-29T21:15:10Z",
-    "deletions": 212,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43924",
+    "created_at": "2026-02-11T17:35:45Z",
+    "deletions": 726,
     "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/43609/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43609",
+    "files_url": "https://github.com/huggingface/transformers/pull/43924/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43924",
     "labels": [],
     "merged": false,
-    "number": 43609,
-    "review_comments_count": 1,
+    "number": 43924,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "[`Attn`] More old mask APIs",
+    "updated_at": "2026-02-11T22:34:32Z"
+  },
+  {
+    "additions": 0,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "The `no_grad` decorators created problems, especially when new tensors are created! Reverting that PR until we can narrow down exactly which areas are problematic.",
+    "changed_files": 9,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43923",
+    "created_at": "2026-02-11T16:39:30Z",
+    "deletions": 17,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43923/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43923",
+    "labels": [],
+    "merged": true,
+    "number": 43923,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[`Rope`] Re-Enable remote code",
-    "updated_at": "2026-01-30T17:09:34Z"
+    "title": "Revert #43897",
+    "updated_at": "2026-02-11T17:21:07Z"
   },
   {
-    "additions": 1,
-    "author": "tobyliu2004",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "\u2026tibility # What does this PR do? Fixes #43582 Fixes TypeError on Apple Silicon (MPS) when loading models with quantization by ensuring `byte_count // 2` returns a Python int. ## The Issue On line 4762 in `modeling_utils.py`, `torch.empty(\u2026",
+    "additions": 2,
+    "author": "DimiChatzipavlis",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes a hard crash (`AttributeError`) in `src/transformers/models/owlvit/convert_owlvit_original_flax_to_hf.py` caused by `collections.MutableMapping`, which was removed in Python 3.10. ## The Problem The script imp\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43608",
-    "created_at": "2026-01-29T18:37:54Z",
-    "deletions": 1,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43922",
+    "created_at": "2026-02-11T16:22:54Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43608/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43608",
+    "files_url": "https://github.com/huggingface/transformers/pull/43922/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43922",
     "labels": [],
     "merged": true,
-    "number": 43608,
+    "number": 43922,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Cast byte_count to int in caching_allocator_warmup for MPS compatibility",
-    "updated_at": "2026-02-02T16:55:51Z"
+    "title": "Fix AttributeError in OwlViT conversion script for Python 3.10+",
+    "updated_at": "2026-02-12T08:33:47Z"
   },
   {
-    "additions": 15,
-    "author": "harshaljanjani",
+    "additions": 3,
+    "author": "svlandeg",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? \u2192 `BarkCausalModel` and `BarkFineModel` incorrectly inferred the device, causing device mismatches when `enable_cpu_offload()` is used. For more details on reproducing the bug, please visit the linked issue! Fixes\u2026",
-    "changed_files": 1,
+    "body_excerpt": "Hi \ud83d\udc4b Typer maintainer here. # What does this PR do? We used to offer `typer-slim` as a more light-weight package, which didn't depend on `rich` and `shellingham`. Unfortuntately, the way it was set up meant that we ran into issues with pac\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43607",
-    "created_at": "2026-01-29T18:31:20Z",
-    "deletions": 5,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43921",
+    "created_at": "2026-02-11T15:54:04Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43607/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43607",
+    "files_url": "https://github.com/huggingface/transformers/pull/43921/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43921",
     "labels": [],
     "merged": true,
-    "number": 43607,
+    "number": 43921,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(models): Fix suno/bark-small CPU offload device mismatch causing CI failures",
-    "updated_at": "2026-01-30T13:21:37Z"
+    "title": "Depend on `typer` instead of `typer-slim`",
+    "updated_at": "2026-02-16T19:02:58Z"
   },
   {
-    "additions": 4,
-    "author": "itazap",
+    "additions": 1,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": null,
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? As per the title. This is a core file, and we cannot allow to change it without triggering everything - see https://github.com/huggingface/transformers/pull/43897#issuecomment-3885203477 as well",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43605",
-    "created_at": "2026-01-29T16:22:35Z",
-    "deletions": 22,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43920",
+    "created_at": "2026-02-11T15:42:20Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43605/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43605",
+    "files_url": "https://github.com/huggingface/transformers/pull/43920/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43920",
     "labels": [],
-    "merged": false,
-    "number": 43605,
+    "merged": true,
+    "number": 43920,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "undo processing test",
-    "updated_at": "2026-02-02T10:55:50Z"
+    "title": "Changes to cache_utils should trigger all tests all the time",
+    "updated_at": "2026-02-11T15:51:31Z"
   },
   {
-    "additions": 2,
-    "author": "vaibhav-research",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? `TokenizersBackend._patch_mistral_regex()` is a Mistral-specific tokenizer patch, but the current implementation may call `huggingface_hub.model_info()` during detection. That triggers an HTTP request to `/api/model\u2026",
+    "additions": 19,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR enables to use `sync_each_batch` argument when passing `gradient_accumulation_kwargs` in `AcceleratorConfig`. I'm also removing `adjust_scheduler` docstring as it is not used/enabled for now as we don't prep\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43603",
-    "created_at": "2026-01-29T15:30:56Z",
-    "deletions": 2,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43919",
+    "created_at": "2026-02-11T15:37:05Z",
+    "deletions": 12,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43603/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43603",
+    "files_url": "https://github.com/huggingface/transformers/pull/43919/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43919",
     "labels": [],
-    "merged": false,
-    "number": 43603,
-    "review_comments_count": 7,
-    "state": "open",
-    "title": "Fix unintended Hub metadata calls from _patch_mistral_regex",
-    "updated_at": "2026-01-29T17:27:09Z"
+    "merged": true,
+    "number": 43919,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "Fix sync gradient",
+    "updated_at": "2026-02-13T14:38:09Z"
   },
   {
-    "additions": 22,
-    "author": "remi-or",
+    "additions": 2,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "This PR fixes two issues related to CB and serve: - when the `max_length` argument as set to `None` when creating a request, CB had an error because it always expected max_length to be an int. This is now an expected behavior, and we have\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43594",
-    "created_at": "2026-01-29T13:06:19Z",
-    "deletions": 13,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43918",
+    "created_at": "2026-02-11T15:26:19Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43594/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43594",
+    "files_url": "https://github.com/huggingface/transformers/pull/43918/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43918",
     "labels": [],
-    "merged": true,
-    "number": 43594,
+    "merged": false,
+    "number": 43918,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[CB] [Serve] Fix broken serve tests",
-    "updated_at": "2026-01-30T13:10:52Z"
+    "title": "foo",
+    "updated_at": "2026-02-11T15:46:51Z"
   },
   {
-    "additions": 32,
-    "author": "Vallabh-1504",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR fixes a crash when initializing `Qwen3OmniMoeTalkerCodePredictorConfig` due to a missing attribute reference. Specifically, it: 1. **Removes** the reference to the non-existent `use_sliding_window` attribute\u2026",
-    "changed_files": 3,
+    "additions": 1157,
+    "author": "IlyasMoutawwakil",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Integrates a library wide monkey patching API to fix #43284 and allow things like restructuring the expert weights and fusing the qkv projections. A concrete and complete example: <details> ```py from typing import\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 8,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43593",
-    "created_at": "2026-01-29T13:01:01Z",
-    "deletions": 4,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43917",
+    "created_at": "2026-02-11T15:08:30Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43593/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43593",
-    "labels": [
-      "for patch"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/43917/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43917",
+    "labels": [],
     "merged": true,
-    "number": 43593,
-    "review_comments_count": 10,
+    "number": 43917,
+    "review_comments_count": 38,
     "state": "closed",
-    "title": "fix: AttributeError for Qwen3_omni_moe",
-    "updated_at": "2026-02-04T10:44:30Z"
+    "title": "Model patching API",
+    "updated_at": "2026-03-02T10:10:50Z"
   },
   {
-    "additions": 223,
-    "author": "zucchini-nlp",
+    "additions": 616,
+    "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/43525 Fixes https://github.com/huggingface/transformers/issues/43572 Adds missing `pad_token_id` and `tie_word_embeddings` to config classes with their defaul\u2026",
-    "changed_files": 38,
+    "body_excerpt": "# What does this PR do? As per the title. See discussion in https://github.com/huggingface/transformers/issues/43885",
+    "changed_files": 296,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 13,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43592",
-    "created_at": "2026-01-29T12:42:02Z",
-    "deletions": 21,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43916",
+    "created_at": "2026-02-11T14:36:44Z",
+    "deletions": 721,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43592/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43592",
-    "labels": [
-      "for patch"
-    ],
+    "files_url": "https://github.com/huggingface/transformers/pull/43916/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43916",
+    "labels": [],
     "merged": true,
-    "number": 43592,
+    "number": 43916,
     "review_comments_count": 2,
     "state": "closed",
-    "title": "Fixes configuration default values",
-    "updated_at": "2026-01-30T11:37:26Z"
+    "title": "Harmonize `input_embeds` to `inputs_embeds` everywhere",
+    "updated_at": "2026-02-11T16:03:56Z"
   },
   {
-    "additions": 15,
-    "author": "itazap",
+    "additions": 272,
+    "author": "merveenoyan",
     "author_association": "MEMBER",
-    "body_excerpt": "custom tokenizers fail on super._init_() call that tries to update vocab size before all vocab attrs are defined",
-    "changed_files": 2,
+    "body_excerpt": "notebook to converted models e2e inference: https://colab.research.google.com/drive/1g-Vc-Zvjy_STNEUbWJhYDTpFyT7o6TGl?usp=sharing models: https://huggingface.co/merve/PaddleOCR-VL-hf https://huggingface.co/merve/PaddleOCR-VL-1.5-hf @molbap",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43591",
-    "created_at": "2026-01-29T12:25:21Z",
-    "deletions": 4,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43915",
+    "created_at": "2026-02-11T14:30:59Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43591/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43591",
-    "labels": [
-      "for patch"
-    ],
-    "merged": true,
-    "number": 43591,
-    "review_comments_count": 1,
-    "state": "closed",
-    "title": "custom tok init fix",
-    "updated_at": "2026-02-02T17:00:18Z"
+    "files_url": "https://github.com/huggingface/transformers/pull/43915/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43915",
+    "labels": [],
+    "merged": false,
+    "number": 43915,
+    "review_comments_count": 0,
+    "state": "open",
+    "title": "add PaddleOCR-VL conversion",
+    "updated_at": "2026-02-11T14:40:51Z"
   },
   {
-    "additions": 5117,
-    "author": "molbap",
+    "additions": 2409,
+    "author": "SunMarc",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? In model additions, we often see old standards not using `check_model_inputs`, `can_return_tuple` and it's often a first review comment/something that can slip through. Doing a wide scan to try to remove all occurre\u2026",
-    "changed_files": 234,
+    "body_excerpt": "# What does this PR do? This PR reorder all methods from Trainer. No modifications was done to the methods itself. With this final PR, all the major changes should be done and we can focus on simplifying the core logic + testing. Reorderin\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 53,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43590",
-    "created_at": "2026-01-29T12:23:56Z",
-    "deletions": 12955,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43914",
+    "created_at": "2026-02-11T13:36:39Z",
+    "deletions": 2323,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43590/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43590",
+    "files_url": "https://github.com/huggingface/transformers/pull/43914/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43914",
     "labels": [],
     "merged": true,
-    "number": 43590,
-    "review_comments_count": 135,
+    "number": 43914,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Remove many output_attentions and other traced outputs on 100+ models ",
-    "updated_at": "2026-03-12T19:08:41Z"
+    "title": "Reorder Trainer methods",
+    "updated_at": "2026-02-13T17:07:59Z"
   },
   {
-    "additions": 256,
+    "additions": 38,
     "author": "Cyrilvallez",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per the title. This simplifies the use of `LoadStateDictInfo` a bit everywhere, and makes it clear that the entries within the struct are mutated in-place all the time.",
-    "changed_files": 6,
+    "body_excerpt": "# What does this PR do? As per the title. Fixes https://github.com/huggingface/transformers/issues/43299#issuecomment-3849688073",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43589",
-    "created_at": "2026-01-29T12:18:14Z",
-    "deletions": 271,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43913",
+    "created_at": "2026-02-11T12:44:05Z",
+    "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43589/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43589",
+    "files_url": "https://github.com/huggingface/transformers/pull/43913/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43913",
     "labels": [],
-    "merged": true,
-    "number": 43589,
-    "review_comments_count": 1,
+    "merged": false,
+    "number": 43913,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Simplify loading structure",
-    "updated_at": "2026-02-02T13:28:43Z"
+    "title": "Add a sentinel mechanism in Transpose and fix qwen3_vl_moe weight mapping",
+    "updated_at": "2026-02-16T15:53:09Z"
   },
   {
-    "additions": 44,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixes https://github.com/huggingface/transformers/issues/43540",
-    "changed_files": 2,
+    "additions": 790,
+    "author": "JaredforReal",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? - fix k_norm as layernorm - add index_head_dim to config - rewrite GlmMoeDsaConfig from PreTrainedConfig - rewrite indexer as an nn.Module class - fix mlp layers mismatch - implement Attention.forward() # Current st\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43588",
-    "created_at": "2026-01-29T11:35:25Z",
-    "deletions": 8,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43912",
+    "created_at": "2026-02-11T12:37:41Z",
+    "deletions": 348,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43588/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43588",
+    "files_url": "https://github.com/huggingface/transformers/pull/43912/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43912",
     "labels": [],
     "merged": true,
-    "number": 43588,
-    "review_comments_count": 0,
+    "number": 43912,
+    "review_comments_count": 18,
     "state": "closed",
-    "title": "Qwen3 omni - fix get video features",
-    "updated_at": "2026-01-29T14:55:30Z"
+    "title": "[fix][wip] GlmMoeDsa: try implement DSA",
+    "updated_at": "2026-03-04T16:30:36Z"
   },
   {
-    "additions": 336,
-    "author": "zucchini-nlp",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Check docstring currently doesn't check modular files. So if a class is defined in modular without any \"copying\" and has a bad docstring, we do not check it. Instead we check the auto-generated code, which sometimes\u2026",
-    "changed_files": 5,
+    "additions": 1,
+    "author": "pavel-esir",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Without this like `AutoTokenizer.from_pretrained(...)` does not create `LlamaTokenizer` object. ## Before submitting - [x] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the ca\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43587",
-    "created_at": "2026-01-29T11:18:12Z",
-    "deletions": 48,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43911",
+    "created_at": "2026-02-11T10:53:54Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43587/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43587",
+    "files_url": "https://github.com/huggingface/transformers/pull/43911/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43911",
     "labels": [],
     "merged": false,
-    "number": 43587,
+    "number": 43911,
     "review_comments_count": 0,
     "state": "open",
-    "title": "[WIP] Check docstring runs on modular files",
-    "updated_at": "2026-01-29T11:27:23Z"
+    "title": "add Llama to mapping names in tokenization_auto.py",
+    "updated_at": "2026-02-18T10:58:59Z"
   },
   {
-    "additions": 8,
-    "author": "Wauplin",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR sets `HF_HUB_DOWNLOAD_TIMEOUT=60` as environment variable when running tests. I've also added [`pytest-env`](https://github.com/pytest-dev/pytest-env) as a dev dependency to do that. I've also reverted https\u2026",
+    "additions": 17,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "@ydshieh , pls help review, thx!",
+    "changed_files": 1,
+    "cluster_id": "cluster-43324-21",
+    "cluster_ids": [
+      "cluster-43324-21"
+    ],
+    "cluster_role": "member",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43910",
+    "created_at": "2026-02-11T09:43:08Z",
+    "deletions": 2,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43910/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43910",
+    "labels": [],
+    "merged": true,
+    "number": 43910,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "Update expected output for Jais2 model tests",
+    "updated_at": "2026-02-27T08:28:06Z"
+  },
+  {
+    "additions": 13,
+    "author": "kaixuanliu",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "@ydshieh , pls help review, thx!",
+    "changed_files": 1,
+    "cluster_id": "cluster-43324-21",
+    "cluster_ids": [
+      "cluster-43324-21"
+    ],
+    "cluster_role": "member",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43907",
+    "created_at": "2026-02-11T08:30:31Z",
+    "deletions": 36,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43907/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43907",
+    "labels": [],
+    "merged": true,
+    "number": 43907,
+    "review_comments_count": 0,
+    "state": "closed",
+    "title": "update glm image model expected out for tests",
+    "updated_at": "2026-02-27T07:21:14Z"
+  },
+  {
+    "additions": 31,
+    "author": "YangKai0616",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "Per the title. Enable the corresponding tests and re-enable the tests that were skipped before.",
     "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43586",
-    "created_at": "2026-01-29T10:46:31Z",
-    "deletions": 2,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43905",
+    "created_at": "2026-02-11T05:31:42Z",
+    "deletions": 23,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43586/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43586",
+    "files_url": "https://github.com/huggingface/transformers/pull/43905/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43905",
     "labels": [],
     "merged": true,
-    "number": 43586,
-    "review_comments_count": 0,
+    "number": 43905,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Increase default ReadTimeout in tests",
-    "updated_at": "2026-01-29T12:10:13Z"
+    "title": "XPU now supports backward for the FA2 fixed path",
+    "updated_at": "2026-02-26T10:48:28Z"
   },
   {
-    "additions": 76,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? The change introduced in https://github.com/huggingface/transformers/pull/43261 with `__setattr__` is quite dangerous, as `source_patterns` and `target_patterns` live together and cannot be unentangled from one anot\u2026",
-    "changed_files": 2,
+    "additions": 3,
+    "author": "cyyever",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? removes two unused dependencies.",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43585",
-    "created_at": "2026-01-29T10:22:53Z",
-    "deletions": 92,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43904",
+    "created_at": "2026-02-11T02:48:18Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43585/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43585",
+    "files_url": "https://github.com/huggingface/transformers/pull/43904/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43904",
     "labels": [],
     "merged": true,
-    "number": 43585,
-    "review_comments_count": 1,
+    "number": 43904,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Regex post processing in loading",
-    "updated_at": "2026-01-29T14:10:28Z"
+    "title": "Remove unused dependencies",
+    "updated_at": "2026-02-18T01:04:31Z"
   },
   {
-    "additions": 25,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Just reuse the local files as much as possible",
+    "additions": 0,
+    "author": "math-hiyoko",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43584",
-    "created_at": "2026-01-29T09:38:40Z",
-    "deletions": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43903",
+    "created_at": "2026-02-11T01:44:06Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43584/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43584",
+    "files_url": "https://github.com/huggingface/transformers/pull/43903/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43903",
     "labels": [],
     "merged": true,
-    "number": 43584,
+    "number": 43903,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix mistral checkpoint loading in `utils/fetch_hub_objects_for_ci.py`: avoid too many requests and/or timeout",
-    "updated_at": "2026-01-29T11:29:13Z"
+    "title": "fix: TextClassificationPipeline docs mentioning deprecated return_all_scores",
+    "updated_at": "2026-02-11T17:13:26Z"
   },
   {
-    "additions": 3,
-    "author": "YangKai0616",
+    "additions": 23,
+    "author": "cyyever",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? `torch.histc` with deterministic algorithms enabled behaves differently across devices: **CPU**: only supports float input **CUDA**: only supports int input This PR updates `grouped_mm_experts_forward` to use the ap\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? This PR fixes and removes more outdated documentation contents <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the t\u2026",
+    "changed_files": 15,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43583",
-    "created_at": "2026-01-29T09:02:59Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43902",
+    "created_at": "2026-02-11T01:39:07Z",
+    "deletions": 164,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43583/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43583",
+    "files_url": "https://github.com/huggingface/transformers/pull/43902/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43902",
     "labels": [],
     "merged": true,
-    "number": 43583,
-    "review_comments_count": 1,
+    "number": 43902,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[MoE] Use int input for histc on CUDA to support deterministic algorithms",
-    "updated_at": "2026-01-30T11:39:52Z"
+    "title": "Fix old tech stack in doc",
+    "updated_at": "2026-02-11T13:23:56Z"
   },
   {
-    "additions": 14,
-    "author": "sywangyi",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "- quantization: @SunMarc @MekkCyber",
-    "changed_files": 2,
+    "additions": 165,
+    "author": "stevhliu",
+    "author_association": "MEMBER",
+    "body_excerpt": "refactors tokenizer docs for v5: - describes new unified tokenization backend architecture and how it works - empty tokenizer initialization with model-specific tokenizer - update structure to mirror developer workflow (load -> encode/deco\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43581",
-    "created_at": "2026-01-29T07:45:14Z",
-    "deletions": 2,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43900",
+    "created_at": "2026-02-11T00:42:19Z",
+    "deletions": 314,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43581/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43581",
+    "files_url": "https://github.com/huggingface/transformers/pull/43900/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43900",
     "labels": [],
     "merged": true,
-    "number": 43581,
-    "review_comments_count": 1,
+    "number": 43900,
+    "review_comments_count": 9,
     "state": "closed",
-    "title": "fix fbgemm fp8 multi-device load failure.",
-    "updated_at": "2026-02-10T14:51:49Z"
+    "title": "[docs] refactor tokenizer docs",
+    "updated_at": "2026-02-17T17:50:01Z"
   },
   {
-    "additions": 2,
-    "author": "kaixuanliu",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "We meet 2 failed test cases for lighton_ocr model: ``` tests/models/lighton_ocr/test_modeling_lighton_ocr.py::LightOnOcrForConditionalGenerationModelTest::test_torch_export tests/models/lighton_ocr/test_modeling_lighton_ocr.py::LightOnOcrF\u2026",
+    "additions": 5,
+    "author": "itazap",
+    "author_association": "MEMBER",
+    "body_excerpt": null,
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43580",
-    "created_at": "2026-01-29T07:24:34Z",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43898",
+    "created_at": "2026-02-10T17:37:34Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43580/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43580",
+    "files_url": "https://github.com/huggingface/transformers/pull/43898/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43898",
     "labels": [],
-    "merged": false,
-    "number": 43580,
+    "merged": true,
+    "number": 43898,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "skip torch export tests for lighton_ocr model",
-    "updated_at": "2026-01-29T13:49:55Z"
+    "title": "adding BC for custom toks accessing slow tok attrs deprecated in v5",
+    "updated_at": "2026-02-10T21:21:33Z"
   },
   {
-    "additions": 13,
-    "author": "YangKai0616",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Hi @ydshieh , please help review this PR, thanks!",
-    "changed_files": 1,
+    "additions": 17,
+    "author": "Rocketknight1",
+    "author_association": "MEMBER",
+    "body_excerpt": "Although our cache update methods are usually used in inference, when grad is disabled anyway, there seem to be some edge cases where they cause problems with compilation and gradient computation. Since we never want to propagate gradient\u2026",
+    "changed_files": 9,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43579",
-    "created_at": "2026-01-29T07:19:36Z",
-    "deletions": 4,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43897",
+    "created_at": "2026-02-10T17:15:15Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43579/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43579",
+    "files_url": "https://github.com/huggingface/transformers/pull/43897/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43897",
     "labels": [],
     "merged": true,
-    "number": 43579,
+    "number": 43897,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add XPU support to the tests for solar_open",
-    "updated_at": "2026-01-29T13:18:47Z"
+    "title": "Decorate cache updates with no_grad, just in case",
+    "updated_at": "2026-02-11T15:31:40Z"
   },
   {
-    "additions": 6,
-    "author": "LuJunru",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "additions": 427,
+    "author": "ebezzam",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As discussed in https://github.com/huggingface/transformers/pull/43820#pullrequestreview-3780031785, expected outputs of DAC need to be recomputed Reasoning: outputs were changed with https://github.com/huggingface/\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43578",
-    "created_at": "2026-01-29T06:52:33Z",
-    "deletions": 15,
+    "comments_count": 10,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43896",
+    "created_at": "2026-02-10T16:49:32Z",
+    "deletions": 304,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43578/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43578",
+    "files_url": "https://github.com/huggingface/transformers/pull/43896/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43896",
     "labels": [],
     "merged": true,
-    "number": 43578,
-    "review_comments_count": 3,
+    "number": 43896,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Update test of Youtu-LLM to pr-aligned repos",
-    "updated_at": "2026-01-29T08:47:09Z"
+    "title": "Fix expected DAC outputs due to (old) change in CI settings.",
+    "updated_at": "2026-02-10T17:47:59Z"
   },
   {
-    "additions": 104,
-    "author": "jp1924",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? In the latest version of transformers, when initializing siglip with ZeRO3 applied, the following error occurs: ```python Fan in and fan out can not be computed for tensor with fewer than 2 dimensions File \"/usr/loc\u2026",
-    "changed_files": 6,
+    "additions": 8,
+    "author": "SunMarc",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This PR reverts `place_model_on_device` back to a property. I prefer not introducing new args for now.",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43574",
-    "created_at": "2026-01-29T01:26:40Z",
-    "deletions": 105,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43895",
+    "created_at": "2026-02-10T15:06:36Z",
+    "deletions": 7,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43574/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43574",
+    "files_url": "https://github.com/huggingface/transformers/pull/43895/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43895",
     "labels": [],
     "merged": true,
-    "number": 43574,
-    "review_comments_count": 5,
+    "number": 43895,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Preventing initialization of siglip's lecun_normal_, default_flax_embed_init in ZeRO3",
-    "updated_at": "2026-02-20T07:32:11Z"
+    "title": "revert place_model_on_device to property",
+    "updated_at": "2026-02-11T10:37:06Z"
   },
   {
-    "additions": 14,
-    "author": "tobyliu2004",
+    "additions": 165,
+    "author": "JaredforReal",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes #43572 Adds the missing `pad_token_id` parameter to `StableLmConfig` to resolve the `AttributeError` when creating StableLM models from config. ## Changes made: - Added `pad_token_id` parameter to `__init__`\u2026",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? - fix k_norm as layernorm - add index_head_dim to config - rewrite GlmMoeDsaConfig from PreTrainedConfig - rewrite indexer as an nn.Module - fix mlp layers mismatch <!-- Congratulations! You've made it this far! You\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43573",
-    "created_at": "2026-01-29T01:10:03Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43894",
+    "created_at": "2026-02-10T14:56:42Z",
+    "deletions": 124,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43573/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43573",
+    "files_url": "https://github.com/huggingface/transformers/pull/43894/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43894",
     "labels": [],
     "merged": false,
-    "number": 43573,
-    "review_comments_count": 0,
+    "number": 43894,
+    "review_comments_count": 8,
     "state": "closed",
-    "title": "Fix: Add missing pad_token_id to StableLmConfig",
-    "updated_at": "2026-01-29T18:17:40Z"
+    "title": "[fix] GlmMoeDsa model",
+    "updated_at": "2026-03-04T16:30:34Z"
   },
   {
-    "additions": 2,
-    "author": "DowellHd",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? This PR adds GIT to the auto-model mapping for the visual-question-answering pipeline and updates the corresponding GIT pipeline test mapping. This ensures that GIT models (e.g. GitForCausalLM) are correctly recogni\u2026",
-    "changed_files": 2,
+    "additions": 55,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Fixes #43854. Long term we should probably have it in the post-init to share them @Cyrilvallez I am lazy today",
+    "changed_files": 20,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43571",
-    "created_at": "2026-01-28T21:45:00Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43893",
+    "created_at": "2026-02-10T14:16:10Z",
+    "deletions": 47,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43571/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43571",
+    "files_url": "https://github.com/huggingface/transformers/pull/43893/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43893",
     "labels": [],
-    "merged": false,
-    "number": 43571,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 43893,
+    "review_comments_count": 5,
     "state": "closed",
-    "title": "Add GIT support to visual-question-answering pipeline",
-    "updated_at": "2026-01-30T13:09:56Z"
+    "title": "Move `_keys_to_ignore_on_load_missing` for now",
+    "updated_at": "2026-02-11T13:47:43Z"
   },
   {
-    "additions": 50,
-    "author": "ydshieh",
+    "additions": 1,
+    "author": "zucchini-nlp",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Use git checkout command instead github api (otherwise we can't push the changes)",
+    "body_excerpt": "# What does this PR do?",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43570",
-    "created_at": "2026-01-28T17:29:16Z",
-    "deletions": 87,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43892",
+    "created_at": "2026-02-10T13:07:29Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43570/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43570",
+    "files_url": "https://github.com/huggingface/transformers/pull/43892/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43892",
     "labels": [],
     "merged": true,
-    "number": 43570,
+    "number": 43892,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix repo. consistency bot (push permission issue)",
-    "updated_at": "2026-01-28T17:38:25Z"
+    "title": "Update hub metadata",
+    "updated_at": "2026-02-10T15:53:51Z"
   },
   {
-    "additions": 509,
-    "author": "remi-or",
-    "author_association": "MEMBER",
-    "body_excerpt": "This PR refactors all of the logic linked to the inputs and outputs of continuous batching into a new class `ContinuousBatchingIOs` itself in a new file. This will be quite useful when we introduce async scheduling, because there will be t\u2026",
-    "changed_files": 4,
+    "additions": 36,
+    "author": "Mercury0226",
+    "author_association": "NONE",
+    "body_excerpt": "Closed PR. Test-only proposal for #7715.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43569",
-    "created_at": "2026-01-28T17:28:49Z",
-    "deletions": 416,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43891",
+    "created_at": "2026-02-10T12:04:08Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43569/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43569",
-    "labels": [],
-    "merged": true,
-    "number": 43569,
+    "files_url": "https://github.com/huggingface/transformers/pull/43891/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43891",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 43891,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[CB] Refactor logic for inputs and outputs outside of the main API",
-    "updated_at": "2026-02-02T12:43:28Z"
+    "title": "tests: extend RAG retriever smoke coverage (#7715)",
+    "updated_at": "2026-02-10T13:26:43Z"
   },
   {
-    "additions": 410,
-    "author": "SunMarc",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR makes it easier to go through TrainingArguments ! This should help making trainer less bloated for the users. A nice first step to make Trainer simpler to use.",
+    "additions": 54,
+    "author": "Mercury0226",
+    "author_association": "NONE",
+    "body_excerpt": "Closed PR. Test-only proposal for #6045.",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43568",
-    "created_at": "2026-01-28T17:07:41Z",
-    "deletions": 416,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43890",
+    "created_at": "2026-02-10T11:48:59Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43568/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43568",
-    "labels": [],
-    "merged": true,
-    "number": 43568,
-    "review_comments_count": 19,
+    "files_url": "https://github.com/huggingface/transformers/pull/43890/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43890",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 43890,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Simplify TrainingArguments docstring",
-    "updated_at": "2026-02-03T12:52:10Z"
+    "title": "tests: add BART memory regression guard test (#6045)",
+    "updated_at": "2026-02-10T13:26:45Z"
   },
   {
-    "additions": 73,
+    "additions": 24,
     "author": "vasqu",
     "author_association": "MEMBER",
-    "body_excerpt": "Kind of a follow up to #43556 which fixed a bunch of wrong inheritance with gradient ckpting. Just dummy exchanged the proper flags and I didn't have to skip a lot of these tests",
-    "changed_files": 6,
+    "body_excerpt": "As per title",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43567",
-    "created_at": "2026-01-28T16:48:38Z",
-    "deletions": 86,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43889",
+    "created_at": "2026-02-10T11:25:01Z",
+    "deletions": 18,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43567/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43567",
+    "files_url": "https://github.com/huggingface/transformers/pull/43889/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43889",
     "labels": [],
     "merged": true,
-    "number": 43567,
+    "number": 43889,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[`Sam`] Fixup training flags",
-    "updated_at": "2026-02-02T11:12:21Z"
+    "title": "[`Jamba`] Fallback to slow path and warn instead of error out",
+    "updated_at": "2026-02-10T12:35:49Z"
   },
   {
-    "additions": 28,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? https://github.com/huggingface/transformers/pull/43523 broke Wav2Vec and a few others as they have their own `tie_weights`, which is actually not tying the weights... Who would have thought that changing the purpose\u2026",
-    "changed_files": 7,
+    "additions": 2074,
+    "author": "bhargav-patel-29",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR adds support for **Param-2-17B-MoE-A2.4B**, a large-scale Mixture-of-Experts (MoE) causal language model. Param-2-17B-MoE-A2.4B uses a **Hybrid Dense + MoE architecture** with 17B total parameters while acti\u2026",
+    "changed_files": 8,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43566",
-    "created_at": "2026-01-28T16:47:58Z",
-    "deletions": 7,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43888",
+    "created_at": "2026-02-10T11:02:13Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43566/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43566",
+    "files_url": "https://github.com/huggingface/transformers/pull/43888/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43888",
     "labels": [],
-    "merged": true,
-    "number": 43566,
+    "merged": false,
+    "number": 43888,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Fix Wav2vec and a few others",
-    "updated_at": "2026-01-28T17:08:53Z"
+    "state": "open",
+    "title": "Support for BharatGen's Param2MoE model architecture",
+    "updated_at": "2026-02-23T07:28:02Z"
   },
   {
-    "additions": 1,
-    "author": "ydshieh",
+    "additions": 12,
+    "author": "eustlb",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Try if the permission is correctly set.",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? MistralCommonBackend does not implement `added_tokens_decoder` so any call to PreTrainedTokenizerBase's `__repr__ `fails.",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43565",
-    "created_at": "2026-01-28T15:51:09Z",
-    "deletions": 19,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43887",
+    "created_at": "2026-02-10T10:55:08Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43565/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43565",
+    "files_url": "https://github.com/huggingface/transformers/pull/43887/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43887",
     "labels": [],
     "merged": true,
-    "number": 43565,
+    "number": 43887,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "check/fix repo. check bot workflow",
-    "updated_at": "2026-01-28T16:00:19Z"
+    "title": "[MistralCommonBackend] fix loading proc",
+    "updated_at": "2026-02-10T12:32:24Z"
   },
   {
-    "additions": 26,
-    "author": "ndeybach",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Avoids flooding the tty when using Sam3VideoModel ( when using propagate_in_video_iterator() ) harmonize argument with other propagate_in_video_iterator in other similar classes (EdgeTamVideoModel, Sam2VideoModel, S\u2026",
-    "changed_files": 2,
+    "additions": 20,
+    "author": "zucchini-nlp",
+    "author_association": "MEMBER",
+    "body_excerpt": "Fixes https://github.com/huggingface/transformers/issues/43878 After the refactor we started saving `out_features` and `stage_names` in timm backbone config, because it now also inherits from `BackboneConfigMixin`. But the modeling code wo\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43564",
-    "created_at": "2026-01-28T15:45:54Z",
-    "deletions": 12,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43886",
+    "created_at": "2026-02-10T10:10:09Z",
+    "deletions": 9,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43564/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43564",
+    "files_url": "https://github.com/huggingface/transformers/pull/43886/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43886",
     "labels": [],
     "merged": true,
-    "number": 43564,
+    "number": 43886,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "add an option to disable Sam3VideoModel progress bar",
-    "updated_at": "2026-01-28T16:20:05Z"
+    "title": "Timm backbone saves and loads `out_features`",
+    "updated_at": "2026-02-12T15:57:04Z"
   },
   {
-    "additions": 9,
-    "author": "ydshieh2",
-    "author_association": "CONTRIBUTOR",
+    "additions": 2,
+    "author": "ydshieh",
+    "author_association": "MEMBER",
     "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 1,
+    "changed_files": 2,
     "cluster_id": "cluster-43488-10",
     "cluster_ids": [
       "cluster-43488-10"
     ],
     "cluster_role": "member",
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43563",
-    "created_at": "2026-01-28T15:20:22Z",
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43884",
+    "created_at": "2026-02-10T09:55:40Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43563/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43563",
+    "files_url": "https://github.com/huggingface/transformers/pull/43884/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43884",
     "labels": [],
     "merged": false,
-    "number": 43563,
+    "number": 43884,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[don't merge] check style bot from fored repo",
-    "updated_at": "2026-01-28T18:41:26Z"
+    "title": "don't merge check workflow",
+    "updated_at": "2026-02-13T02:03:43Z"
   },
   {
-    "additions": 9,
-    "author": "remi-or",
+    "additions": 3,
+    "author": "tomaarsen",
     "author_association": "MEMBER",
-    "body_excerpt": "This is a draft / debug PR to check why the CI is red. No need to review.",
+    "body_excerpt": "# What does this PR do? Resolves https://github.com/huggingface/transformers/pull/42564#issuecomment-3874606093 #42564 updated `get_image_features` for Llama4, but it erroneously started using `pooler_output` instead of the previous `last_\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43562",
-    "created_at": "2026-01-28T15:12:31Z",
-    "deletions": 0,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/43562/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43562",
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43882",
+    "created_at": "2026-02-10T08:38:51Z",
+    "deletions": 3,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43882/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43882",
     "labels": [],
-    "merged": false,
-    "number": 43562,
-    "review_comments_count": 0,
+    "merged": true,
+    "number": 43882,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "[Debug] [Draft] Investigating red CI",
-    "updated_at": "2026-01-28T17:39:37Z"
+    "title": "[`fix`] Use `last_hidden_state` key from `get_image_features` for llama4",
+    "updated_at": "2026-02-10T15:05:50Z"
   },
   {
-    "additions": 0,
-    "author": "vkuzo",
+    "additions": 14,
+    "author": "jiqing-feng",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Summary: This is being deprecated: https://github.com/pytorch/ao/issues/3739 Test Plan: ```python RUN_SLOW=1 pytest tests/quantization/torchao_integration/test_torchao.py -s ``` # What does this PR do? <!-- Congratulations! You've made it\u2026",
+    "body_excerpt": "After the \"Refactor weight loading\" commit (#41580), UMT5EncoderModel fails to properly tie encoder.embed_tokens.weight to shared.weight when loading checkpoints with tie_word_embeddings=False (e.g., Wan-AI video generation models). This c\u2026",
     "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43561",
-    "created_at": "2026-01-28T15:04:44Z",
-    "deletions": 104,
+    "comments_count": 11,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43880",
+    "created_at": "2026-02-10T07:26:07Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43561/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43561",
+    "files_url": "https://github.com/huggingface/transformers/pull/43880/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43880",
     "labels": [],
     "merged": true,
-    "number": 43561,
-    "review_comments_count": 0,
+    "number": 43880,
+    "review_comments_count": 3,
     "state": "closed",
-    "title": "remove torchao.autoquant from transformers",
-    "updated_at": "2026-02-02T11:07:11Z"
+    "title": "Fix UMT5EncoderModel embedding weights not being tied after loading",
+    "updated_at": "2026-02-19T12:42:24Z"
   },
   {
-    "additions": 2,
-    "author": "Rocketknight1",
-    "author_association": "MEMBER",
-    "body_excerpt": "We switched from `requests`, with no timeout by default, to `httpx`, which does have a read timeout by default. This causes some timeout errors in the CI, so this PR increases the timeout length by following the snippet [here](https://gith\u2026",
-    "changed_files": 1,
+    "additions": 17,
+    "author": "cyyever",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 17,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43560",
-    "created_at": "2026-01-28T14:42:21Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43879",
+    "created_at": "2026-02-10T06:42:49Z",
+    "deletions": 17,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43560/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43560",
+    "files_url": "https://github.com/huggingface/transformers/pull/43879/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43879",
     "labels": [],
     "merged": true,
-    "number": 43560,
+    "number": 43879,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Increase timeout when preparing CI",
-    "updated_at": "2026-01-28T14:54:33Z"
+    "title": "Fix old tech stack in doc",
+    "updated_at": "2026-02-10T13:58:11Z"
   },
   {
-    "additions": 8,
-    "author": "tomaarsen",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? * Allow the encoder of T5Gemma2 to be loaded standalone ### Details This is valuable for Sentence Transformers, which may want to load the encoder only (see https://github.com/huggingface/sentence-transformers/pull/\u2026",
-    "changed_files": 6,
+    "additions": 39,
+    "author": "weiguangli-io",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary Fixes #43874 by adding `get_number_of_image_patches` to `Glm46VImageProcessorFast`. `Glm46VProcessor._get_num_multimodal_tokens` calls this method on `self.image_processor`, which raises an `AttributeError` when the fast image p\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 13,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43559",
-    "created_at": "2026-01-28T14:41:34Z",
-    "deletions": 0,
+    "comments_count": 10,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43877",
+    "created_at": "2026-02-10T04:50:19Z",
+    "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43559/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43559",
+    "files_url": "https://github.com/huggingface/transformers/pull/43877/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43877",
     "labels": [],
     "merged": true,
-    "number": 43559,
-    "review_comments_count": 6,
-    "state": "closed",
-    "title": "[`feat`] Allow loading T5Gemma2Encoder with AutoModel",
-    "updated_at": "2026-02-03T22:21:11Z"
-  },
-  {
-    "additions": 9,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 1,
-    "cluster_id": "cluster-43488-10",
-    "cluster_ids": [
-      "cluster-43488-10"
-    ],
-    "cluster_role": "member",
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43558",
-    "created_at": "2026-01-28T14:08:17Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43558/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43558",
-    "labels": [],
-    "merged": false,
-    "number": 43558,
-    "review_comments_count": 0,
+    "number": 43877,
+    "review_comments_count": 5,
     "state": "closed",
-    "title": "check style bot",
-    "updated_at": "2026-01-30T14:38:00Z"
+    "title": "Fix missing fast image patch counter in Glm46V",
+    "updated_at": "2026-02-11T12:23:27Z"
   },
   {
-    "additions": 2,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This test was taking 3 to 4mn on the CI `test_voxtral_tokenizer_converts_from_tekken` https://github.com/huggingface/transformers/blob/main/tests/models/auto/test_tokenization_auto.py#L213 Turns out we have a super\u2026",
-    "changed_files": 1,
+    "additions": 17,
+    "author": "weiguangli-io",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary This PR fixes #43864 by preserving the `GlmMoeDsaConfig` default `mlp_layer_types` from the modular source. `GlmMoeDsaConfig` should default to dense MLP for the first 3 layers and sparse afterward. During modular conversion, th\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43557",
-    "created_at": "2026-01-28T14:06:09Z",
-    "deletions": 1,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43876",
+    "created_at": "2026-02-10T04:28:23Z",
+    "deletions": 95,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43557/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43557",
+    "files_url": "https://github.com/huggingface/transformers/pull/43876/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43876",
     "labels": [],
     "merged": true,
-    "number": 43557,
-    "review_comments_count": 1,
+    "number": 43876,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "fix(converter): speed up `MistralConverter.extract_vocab_merges_from_model`",
-    "updated_at": "2026-01-29T09:15:11Z"
+    "title": "Fix GlmMoeDsaConfig default mlp_layer_types in modular conversion",
+    "updated_at": "2026-02-10T12:24:19Z"
   },
   {
-    "additions": 51,
-    "author": "vasqu",
-    "author_association": "MEMBER",
-    "body_excerpt": "As per title, it is an edge case for lasr only at the moment but #41212 will also need it. I think this is the easiest solution (and fastest) because there are several edge cases with additional bases.",
-    "changed_files": 12,
+    "additions": 31,
+    "author": "cyyever",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Its reset should clear ```_quantized_keys``` and ```_quantized_values ```.",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43556",
-    "created_at": "2026-01-28T13:57:52Z",
-    "deletions": 44,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43556/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43556",
-    "labels": [],
-    "merged": true,
-    "number": 43556,
-    "review_comments_count": 5,
-    "state": "closed",
-    "title": "[`Modular`] Allow to add new bases that are not present in the inherited class",
-    "updated_at": "2026-01-28T16:33:01Z"
-  },
-  {
-    "additions": 1,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do?",
-    "changed_files": 1,
-    "cluster_id": "cluster-43488-10",
-    "cluster_ids": [
-      "cluster-43488-10"
-    ],
-    "cluster_role": "canonical",
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43555",
-    "created_at": "2026-01-28T13:55:17Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43875",
+    "created_at": "2026-02-10T02:56:22Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43555/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43555",
+    "files_url": "https://github.com/huggingface/transformers/pull/43875/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43875",
     "labels": [],
-    "merged": true,
-    "number": 43555,
+    "merged": false,
+    "number": 43875,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "check PR bot permission - part 3 (try content attribute)",
-    "updated_at": "2026-01-28T14:04:54Z"
+    "state": "open",
+    "title": "Improve handling of QuantizedLayer.reset",
+    "updated_at": "2026-02-11T00:46:54Z"
   },
   {
-    "additions": 1,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do?",
+    "additions": 6,
+    "author": "Tanmaygangurde20",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? Fixes #43867 This PR fixes a `TypeError` in the `dot_natural_key` function in `src/transformers/core_model_loading.py` that occurs when sorting model state dictionary keys with mixed numeric and string structures. #\u2026",
     "changed_files": 1,
-    "cluster_id": "cluster-43488-10",
-    "cluster_ids": [
-      "cluster-43488-10"
-    ],
-    "cluster_role": "member",
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
     "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43554",
-    "created_at": "2026-01-28T13:49:19Z",
-    "deletions": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43871",
+    "created_at": "2026-02-09T20:19:43Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43554/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43554",
-    "labels": [],
-    "merged": true,
-    "number": 43554,
+    "files_url": "https://github.com/huggingface/transformers/pull/43871/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43871",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 43871,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "check PR bot permission - part 2 (style only)",
-    "updated_at": "2026-01-28T13:50:00Z"
+    "title": "Fix TypeError in dot_natural_key when sorting mixed structure keys",
+    "updated_at": "2026-02-10T12:45:05Z"
   },
   {
-    "additions": 18,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do?",
-    "changed_files": 1,
-    "cluster_id": "cluster-43488-10",
-    "cluster_ids": [
-      "cluster-43488-10"
-    ],
-    "cluster_role": "member",
+    "additions": 90,
+    "author": "daniel7an",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Adds `interpolate_pos_encoding` support to the **VitPoseBackbone** model, enabling pretrained checkpoints to be used on input images of different resolutions. This follows the same pattern established in other visi\u2026",
+    "changed_files": 2,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43553",
-    "created_at": "2026-01-28T13:41:21Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43870",
+    "created_at": "2026-02-09T19:20:34Z",
+    "deletions": 10,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43553/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43553",
-    "labels": [],
-    "merged": true,
-    "number": 43553,
+    "files_url": "https://github.com/huggingface/transformers/pull/43870/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43870",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 43870,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "check PR bot permission - part 1",
-    "updated_at": "2026-01-28T13:50:48Z"
+    "title": "Add interpolate_pos_encoding to VitPoseBackbone",
+    "updated_at": "2026-02-10T12:48:09Z"
   },
   {
-    "additions": 173,
-    "author": "Abdennacer-Badaoui",
+    "additions": 8,
+    "author": "Rocketknight1",
     "author_association": "MEMBER",
-    "body_excerpt": "**Summary** This PR improves GPU monitoring performance by switching from threading to multiprocessing and using the amdsmi Python library instead of rocm-smi subprocess calls for AMD GPUs. **Changes** - Threading \u2192 Multiprocessing: GPU mo\u2026",
-    "changed_files": 2,
+    "body_excerpt": "There were some leftover references to the TranslationPipeline, so this PR removes them. Includes the cleanup from #43826 as well as some other changes!",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43552",
-    "created_at": "2026-01-28T13:35:17Z",
-    "deletions": 72,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43869",
+    "created_at": "2026-02-09T18:35:27Z",
+    "deletions": 99,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43552/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43552",
+    "files_url": "https://github.com/huggingface/transformers/pull/43869/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43869",
     "labels": [],
     "merged": true,
-    "number": 43552,
-    "review_comments_count": 9,
+    "number": 43869,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "Improve GPU monitoring: switch to multiprocessing and use amdsmi for AMD GPUs",
-    "updated_at": "2026-01-29T09:12:50Z"
+    "title": "Remove remaining vestiges of the TranslationPipeline",
+    "updated_at": "2026-02-26T14:38:05Z"
   },
   {
-    "additions": 2,
-    "author": "harshaljanjani",
+    "additions": 1,
+    "author": "thecaptain789",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? \u2192 Fix SDPA `torch.compile` failure in [Bamba-9B-v2](https://huggingface.co/ibm-ai-platform/Bamba-9B-v2). Fixes #43550. ### Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the oth\u2026",
-    "changed_files": 2,
+    "body_excerpt": "Fixes a simple typo in the timm backbones documentation. 'neccessary' \u2192 'necessary'",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 8,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43551",
-    "created_at": "2026-01-28T11:27:56Z",
-    "deletions": 2,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43868",
+    "created_at": "2026-02-09T17:58:02Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43551/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43551",
+    "files_url": "https://github.com/huggingface/transformers/pull/43868/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43868",
     "labels": [],
-    "merged": false,
-    "number": 43551,
+    "merged": true,
+    "number": 43868,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(models): Bamba model fails with torch.compile when using SDPA",
-    "updated_at": "2026-02-10T03:57:47Z"
+    "title": "docs: correct typo 'neccessary' to 'necessary'",
+    "updated_at": "2026-02-09T18:18:58Z"
   },
   {
-    "additions": 15,
-    "author": "MekkCyber",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Before we were just silently skipping parameters that are passed by the user like `s_aux` in case they are not supported by the attention backend specified, it would be better to raise an exception instead. cc @dani\u2026",
+    "additions": 4,
+    "author": "Deep-unlearning",
+    "author_association": "MEMBER",
+    "body_excerpt": null,
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43549",
-    "created_at": "2026-01-28T10:41:55Z",
-    "deletions": 0,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43865",
+    "created_at": "2026-02-09T15:12:33Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43549/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43549",
+    "files_url": "https://github.com/huggingface/transformers/pull/43865/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43865",
     "labels": [],
-    "merged": false,
-    "number": 43549,
-    "review_comments_count": 3,
-    "state": "open",
-    "title": "[kernels] exception handling for fa kernels",
-    "updated_at": "2026-01-28T14:52:46Z"
+    "merged": true,
+    "number": 43865,
+    "review_comments_count": 17,
+    "state": "closed",
+    "title": "update somes audio models",
+    "updated_at": "2026-02-13T10:25:47Z"
   },
   {
-    "additions": 9,
-    "author": "tomaszcichy98",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## What does this PR do? Fixes a bug where `Qwen2VLImageProcessorFast` doesn't set `min_pixels` and `max_pixels` instance attributes, breaking compatibility with code that expects these attributes. ## The Problem The slow processor (`Qwen2\u2026",
-    "changed_files": 3,
+    "additions": 5,
+    "author": "eustlb",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? This allows to do something like: ```python from transformers import AutoProcessor from datasets import Audio, load_dataset model_id = \"openai/whisper-large-v3\" processor = AutoProcessor.from_pretrained(model_id) da\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43548",
-    "created_at": "2026-01-28T10:35:52Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43863",
+    "created_at": "2026-02-09T14:18:04Z",
+    "deletions": 16,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43548/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43548",
+    "files_url": "https://github.com/huggingface/transformers/pull/43863/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43863",
     "labels": [],
     "merged": false,
-    "number": 43548,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "[Qwen2VL] Fix missing min_pixels/max_pixels attributes in fast image processor",
-    "updated_at": "2026-01-29T20:12:47Z"
-  },
-  {
-    "additions": 14,
-    "author": "sywangyi",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "\u2026gemmFp8Test::test_change_loading_attributes - quantization: @SunMarc @MekkCyber input_scale_ub is not correctly updated, since replace_with_fbgemm_fp8_linear is called under meta device.",
-    "changed_files": 3,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43547",
-    "created_at": "2026-01-28T08:37:08Z",
-    "deletions": 7,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43547/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43547",
-    "labels": [],
-    "merged": true,
-    "number": 43547,
+    "number": 43863,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "fix the error of tests/quantization/fbgemm_fp8/test_fbgemm_fp8.py::Fb\u2026",
-    "updated_at": "2026-02-10T14:36:36Z"
+    "state": "open",
+    "title": "[whisper] allow to pass text/audio specific kwargs",
+    "updated_at": "2026-02-10T11:28:03Z"
   },
   {
-    "additions": 0,
-    "author": "jiqing-feng",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "Script: ```python import torch from transformers import pipeline from datasets import load_dataset device = \"cuda:0\" if torch.cuda.is_available() else \"cpu\" pipe = pipeline( \"automatic-speech-recognition\", model=\"openai/whisper-small\", dev\u2026",
-    "changed_files": 1,
+    "additions": 1642,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title. `check_model_inputs` is becoming very complex and doing more than what it should. Let's separate into 2 clear decorators: - `capture_outputs`: everything related to capture outputs - `merge_with_co\u2026",
+    "changed_files": 261,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43546",
-    "created_at": "2026-01-28T07:44:11Z",
-    "deletions": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43862",
+    "created_at": "2026-02-09T13:37:05Z",
+    "deletions": 996,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43546/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43546",
+    "files_url": "https://github.com/huggingface/transformers/pull/43862/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43862",
     "labels": [],
     "merged": true,
-    "number": 43546,
-    "review_comments_count": 5,
+    "number": 43862,
+    "review_comments_count": 4,
     "state": "closed",
-    "title": "Remove `num_frames` in ASR pipeline",
-    "updated_at": "2026-02-03T09:48:22Z"
+    "title": "Separate `check_model_inputs` into `capture_outputs` and `merge_with_config_defaults` + ensure correctness",
+    "updated_at": "2026-02-10T17:37:46Z"
   },
   {
-    "additions": 5,
-    "author": "sywangyi",
+    "additions": 2,
+    "author": "thecaptain789",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "- quantization: @SunMarc @MekkCyber should we remove such test. or else will throw error FAILED tests/quantization/fbgemm_fp8/test_fbgemm_fp8.py::FbgemmFp8LinearTest::test_linear_preserves_shape - NameError: name 'quantize_fp8_per_row' is\u2026",
+    "body_excerpt": "Fixes #43813 Corrects the typo in `src/transformers/integrations/peft.py` on lines 264 and 303 where `quantizatin_operations` should be `quantization_operations`.",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43545",
-    "created_at": "2026-01-28T06:40:55Z",
-    "deletions": 36,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43861",
+    "created_at": "2026-02-09T11:50:51Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43545/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43545",
+    "files_url": "https://github.com/huggingface/transformers/pull/43861/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43861",
     "labels": [],
-    "merged": true,
-    "number": 43545,
-    "review_comments_count": 1,
+    "merged": false,
+    "number": 43861,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "remove FbgemmFp8LinearTest",
-    "updated_at": "2026-01-29T12:37:16Z"
+    "title": "fix: correct typo 'quantizatin_operations' to 'quantization_operations'",
+    "updated_at": "2026-02-25T13:25:05Z"
   },
   {
     "additions": 1,
-    "author": "Olexandr88",
+    "author": "pavel-esir",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": null,
+    "body_excerpt": "# What does this PR do? When tokenizer is red from `tokenizer.model` then model type whether it's `Unigram` or `BPE` is done incorrectly. Correct Unigram model type is 1 not 2 according to `SentencePiece` src https://github.com/google/sent\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43544",
-    "created_at": "2026-01-28T06:28:03Z",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43860",
+    "created_at": "2026-02-09T11:45:22Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43544/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43544",
+    "files_url": "https://github.com/huggingface/transformers/pull/43860/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43860",
     "labels": [],
     "merged": true,
-    "number": 43544,
+    "number": 43860,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix typos in add_new_model_like docstrings",
-    "updated_at": "2026-03-11T13:58:05Z"
+    "title": "Update ModelType for Unigram tokenizer",
+    "updated_at": "2026-02-09T15:13:45Z"
   },
   {
-    "additions": 22,
-    "author": "ITcarrot",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 22,
+    "additions": 1719,
+    "author": "Cyrilvallez",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? As per the title!",
+    "changed_files": 12,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43543",
-    "created_at": "2026-01-28T04:38:01Z",
-    "deletions": 22,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43858",
+    "created_at": "2026-02-09T10:28:19Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43543/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43543",
-    "labels": [],
-    "merged": false,
-    "number": 43543,
+    "files_url": "https://github.com/huggingface/transformers/pull/43858/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43858",
+    "labels": [
+      "New model"
+    ],
+    "merged": true,
+    "number": 43858,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Fix fp16 underflow in MoE load balancing loss by enforcing fp32 softmax",
-    "updated_at": "2026-01-28T04:39:00Z"
+    "state": "closed",
+    "title": "Add GlmMoeDsa",
+    "updated_at": "2026-02-21T10:38:46Z"
   },
   {
-    "additions": 16,
-    "author": "ITcarrot",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? This PR fixes a bug in the router implementation of several MoE models (Qwen Moe like models, `Olmoe`, `FlexOlmo`). Previously, the raw `router_logits` were being overwritten by the result of the softmax operation:\u2026",
-    "changed_files": 7,
+    "additions": 348,
+    "author": "ArthurZucker",
+    "author_association": "MEMBER",
+    "body_excerpt": "# What does this PR do? Should just be a way to skip remote code if we want to",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43542",
-    "created_at": "2026-01-28T04:07:48Z",
-    "deletions": 16,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43857",
+    "created_at": "2026-02-09T10:13:39Z",
+    "deletions": 11,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43542/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43542",
+    "files_url": "https://github.com/huggingface/transformers/pull/43857/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43857",
     "labels": [],
     "merged": false,
-    "number": 43542,
+    "number": 43857,
     "review_comments_count": 0,
     "state": "open",
-    "title": "fix: output router capture wrong router logits in qwen moe models",
-    "updated_at": "2026-01-28T10:25:19Z"
-  },
-  {
-    "additions": 4,
-    "author": "Rocketknight1",
-    "author_association": "MEMBER",
-    "body_excerpt": "Llama4 look for `pad_token_id` on `self.config` in some cases, but I think it actually lives on `self.config.text_config`. This PR should fix things! There was a similar issue with Qwen3, but thankfully I couldn't find any other affected m\u2026",
-    "changed_files": 1,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43539",
-    "created_at": "2026-01-27T18:13:11Z",
-    "deletions": 1,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43539/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43539",
-    "labels": [],
-    "merged": true,
-    "number": 43539,
-    "review_comments_count": 1,
-    "state": "closed",
-    "title": "Look for the pad_token_id in the right place for Llama4",
-    "updated_at": "2026-02-09T17:24:21Z"
+    "title": "Allow to bypass remote code if we want to try and convert it",
+    "updated_at": "2026-02-26T15:19:02Z"
   },
   {
-    "additions": 2446,
-    "author": "lashahub",
+    "additions": 0,
+    "author": "kaixuanliu",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR adds support for **Music Flamingo**, NVIDIA's open large audio-language model designed for deep music understanding and reasoning. - **Paper**: [Music Flamingo: Scaling Music Understanding in Audio Language Models](https://huggingf\u2026",
-    "changed_files": 28,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 27,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43538",
-    "created_at": "2026-01-27T17:37:34Z",
-    "deletions": 73,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43538/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43538",
-    "labels": [
-      "New model",
-      "Audio"
-    ],
-    "merged": false,
-    "number": 43538,
-    "review_comments_count": 101,
-    "state": "open",
-    "title": "Add Music Flamingo",
-    "updated_at": "2026-03-25T17:19:57Z"
-  },
-  {
-    "additions": 125,
-    "author": "Rocketknight1",
-    "author_association": "MEMBER",
-    "body_excerpt": "We get flaky generation tests in the CI a lot. A very common cause is the `has_similar_generate_outputs` helper function - this function is called by many tests, but it has very narrow tolerances by default in the CI (`atol=1e-5, rtol=1e-5\u2026",
-    "changed_files": 12,
+    "body_excerpt": "When all routing weights are zero, in this line [out_per_sample = out_per_sample * sample_weights.unsqueeze(-1)](https://github.com/huggingface/transformers/blob/main/src/transformers/integrations/moe.py#L153) , it will also output all zer\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43537",
-    "created_at": "2026-01-27T17:34:58Z",
-    "deletions": 74,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43855",
+    "created_at": "2026-02-09T08:40:45Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43537/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43537",
+    "files_url": "https://github.com/huggingface/transformers/pull/43855/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43855",
     "labels": [],
     "merged": true,
-    "number": 43537,
-    "review_comments_count": 5,
+    "number": 43855,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Operation Green CI II",
-    "updated_at": "2026-01-29T18:49:23Z"
+    "title": "delete unnecessary code to make moe compatible to full graph compile",
+    "updated_at": "2026-02-09T12:16:03Z"
   },
   {
-    "additions": 73,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? The dependencies and extras have almost never been touched and are quite old. We are suffering quite a bit from the following: - duplicated dependencies (e.g. being still in extra etc even if they are main dependenc\u2026",
-    "changed_files": 18,
+    "additions": 1,
+    "author": "sywangyi",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "- quantization: @SunMarc @MekkCyber ``` [rank0]: During handling of the above exception, another exception occurred: [rank0]: Traceback (most recent call last): [rank0]: File \"/mnt/disk3/wangyi/transformers/test_tp.py\", line 24, in <module\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43536",
-    "created_at": "2026-01-27T17:15:28Z",
-    "deletions": 3600,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43853",
+    "created_at": "2026-02-09T06:39:17Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43536/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43536",
+    "files_url": "https://github.com/huggingface/transformers/pull/43853/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43853",
     "labels": [],
     "merged": true,
-    "number": 43536,
-    "review_comments_count": 2,
+    "number": 43853,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Rework dependencies and extras + Remove outdated `templates` folder",
-    "updated_at": "2026-01-28T17:46:10Z"
+    "title": "fix gptoss crash in tp",
+    "updated_at": "2026-02-09T10:42:33Z"
   },
   {
-    "additions": 2,
-    "author": "Rocketknight1",
+    "additions": 41,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "#43530 fails because the test I wrote for verifying correct downloads fails on image files, which might have UTF-8 illegal bytes. Opening the file in `b` mode fixes it. This doesn't show up in the CI because the script is run when recreati\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? Currently, if slack reporting (re-usable) workflow or `check new failure` workflow fails, the CI triggered via a comment in a pull request will send a comment back to the PR page with \u2705 No failing test specific to t\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43535",
-    "created_at": "2026-01-27T17:09:31Z",
+    "comments_count": 15,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43852",
+    "created_at": "2026-02-09T05:43:44Z",
     "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43535/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43535",
+    "files_url": "https://github.com/huggingface/transformers/pull/43852/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43852",
     "labels": [],
     "merged": true,
-    "number": 43535,
+    "number": 43852,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix for #43530",
-    "updated_at": "2026-01-27T17:21:28Z"
+    "title": "don't merge check",
+    "updated_at": "2026-02-10T09:45:42Z"
   },
   {
-    "additions": 17,
-    "author": "Abdennacer-Badaoui",
+    "additions": 1,
+    "author": "ydshieh",
     "author_association": "MEMBER",
-    "body_excerpt": "GPU monitoring causes ~2-3x slowdown on AMD GPUs during benchmark runs. This PR automatically disables GPU monitoring when an AMD GPU is detected, with a warning message explaining the reason. Users can still force-enable GPU monitoring wi\u2026",
+    "body_excerpt": "# What does this PR do?",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43534",
-    "created_at": "2026-01-27T16:51:26Z",
-    "deletions": 1,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/43534/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43534",
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43851",
+    "created_at": "2026-02-09T03:29:35Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43851/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43851",
     "labels": [],
-    "merged": false,
-    "number": 43534,
+    "merged": true,
+    "number": 43851,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[Benchmark] Disable GPU monitoring by default on AMD GPUs",
-    "updated_at": "2026-01-29T09:54:57Z"
+    "title": "Fix slack-report workflow file",
+    "updated_at": "2026-02-09T05:41:32Z"
   },
   {
-    "additions": 6,
-    "author": "Codalorian",
-    "author_association": "NONE",
-    "body_excerpt": "Phi functionality working very well. Llama models would probably be the next step to take.",
-    "changed_files": 3,
+    "additions": 2,
+    "author": "weiguangli-io",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary - remove `pipeline()`-based inference examples from the summarization and translation task docs - keep only direct `AutoTokenizer` + `AutoModelForSeq2SeqLM.generate` examples, which match v5 behavior ## Validation - `grep -R --l\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43533",
-    "created_at": "2026-01-27T16:51:08Z",
-    "deletions": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43850",
+    "created_at": "2026-02-09T02:34:44Z",
+    "deletions": 25,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43533/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43533",
+    "files_url": "https://github.com/huggingface/transformers/pull/43850/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43850",
     "labels": [],
     "merged": false,
-    "number": 43533,
+    "number": 43850,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Add LBNet functionality to Llama models next",
-    "updated_at": "2026-01-27T16:52:10Z"
-  },
-  {
-    "additions": 88,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 6,
-    "cluster_id": "cluster-43488-10",
-    "cluster_ids": [
-      "cluster-43488-10"
-    ],
-    "cluster_role": "member",
-    "comments_count": 8,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43532",
-    "created_at": "2026-01-27T16:32:01Z",
-    "deletions": 31,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43532/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43532",
-    "labels": [],
-    "merged": false,
-    "number": 43532,
-    "review_comments_count": 2,
-    "state": "open",
-    "title": "[don't merge] Show diff",
-    "updated_at": "2026-01-28T08:36:34Z"
+    "title": "Remove stale pipeline inference examples from v5 task docs",
+    "updated_at": "2026-02-09T12:24:03Z"
   },
   {
-    "additions": 89,
-    "author": "Rocketknight1",
-    "author_association": "MEMBER",
-    "body_excerpt": "This is the PR where I just keep patching things until it's green and then I merge",
-    "changed_files": 3,
+    "additions": 42,
+    "author": "weiguangli-io",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## Summary - only treat `translation_XX_to_YY` as a special task when the `translation` pipeline is actually registered - stop advertising `translation_XX_to_YY` in unknown-task error messages when translation is not supported - add regres\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43530",
-    "created_at": "2026-01-27T14:27:58Z",
-    "deletions": 17,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43849",
+    "created_at": "2026-02-09T02:31:22Z",
+    "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43530/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43530",
+    "files_url": "https://github.com/huggingface/transformers/pull/43849/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43849",
     "labels": [],
-    "merged": true,
-    "number": 43530,
-    "review_comments_count": 14,
+    "merged": false,
+    "number": 43849,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Operation Green CI",
-    "updated_at": "2026-01-27T16:50:42Z"
+    "title": "Fix translation task validation when translation pipeline is unavailable",
+    "updated_at": "2026-03-03T09:42:38Z"
   },
   {
-    "additions": 1,
-    "author": "MekkCyber",
+    "additions": 70,
+    "author": "weiguangli-io",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? We are standardizing kernel names in `kernels-community` to use `-` instead of `_`, this pr simply updates `cv_utils` new kernel is here with the latest torch version 2.10: https://huggingface.co/kernels-community/c\u2026",
+    "body_excerpt": "## Summary Fixes #43698 by adding SwanLab resume init argument support in the Trainer integration. ## What changed - `SwanLabCallback.setup()` now forwards two optional env-based init args to `swanlab.init(...)`: - `SWANLAB_RUN_ID` -> `id`\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43529",
-    "created_at": "2026-01-27T13:56:00Z",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43848",
+    "created_at": "2026-02-09T02:10:15Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43529/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43529",
+    "files_url": "https://github.com/huggingface/transformers/pull/43848/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43848",
     "labels": [],
     "merged": true,
-    "number": 43529,
+    "number": 43848,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[kernels] Update cv_utils name",
-    "updated_at": "2026-01-27T15:33:10Z"
+    "title": "Fix SwanLab callback to forward resume init args",
+    "updated_at": "2026-02-10T12:57:08Z"
   },
   {
-    "additions": 2,
-    "author": "MekkCyber",
+    "additions": 39,
+    "author": "tohtana",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? change `triton_kernels` name to `gpt-oss-triton-kernels`, no new failling tests related to this change",
-    "changed_files": 2,
+    "body_excerpt": "# What does this PR do? When using `HfDeepSpeedConfig` + ZeRO-3 with `from_config()`, the model's custom weight initialization (`_init_weights`) is silently skipped because parameters are already partitioned by `deepspeed.zero.Init`. The m\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43528",
-    "created_at": "2026-01-27T13:44:57Z",
-    "deletions": 2,
+    "comments_count": 7,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43847",
+    "created_at": "2026-02-08T22:04:44Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43528/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43528",
+    "files_url": "https://github.com/huggingface/transformers/pull/43847/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43847",
     "labels": [],
-    "merged": true,
-    "number": 43528,
-    "review_comments_count": 0,
+    "merged": false,
+    "number": 43847,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "[Quantization] Fix triton_kernels name after being renamed to gpt-oss-triton-kernels",
-    "updated_at": "2026-01-28T10:45:35Z"
+    "title": " Fix _from_config silently skipping weight initialization under DeepSpeed ZeRO-3",
+    "updated_at": "2026-02-27T18:50:02Z"
   },
   {
-    "additions": 30,
-    "author": "sbucaille",
+    "additions": 108,
+    "author": "vasanthrpjan1-boop",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes #43526 ## Who can review? @yonigozlan @molbap",
-    "changed_files": 10,
+    "body_excerpt": "Add support for logging training metrics to the Pluto experiment tracker (https://pluto.trainy.ai/) as a native Trainer callback. This provides an alternative for users migrating from Neptune given its upcoming shutdown. Closes #43724 # Wh\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43527",
-    "created_at": "2026-01-27T13:29:06Z",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43843",
+    "created_at": "2026-02-08T14:29:51Z",
     "deletions": 5,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43527/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43527",
+    "files_url": "https://github.com/huggingface/transformers/pull/43843/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43843",
     "labels": [],
-    "merged": true,
-    "number": 43527,
+    "merged": false,
+    "number": 43843,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: return labels instead of label in reduce_label method in BeitImageProcessorFast",
-    "updated_at": "2026-01-28T01:05:25Z"
-  },
-  {
-    "additions": 25,
-    "author": "kashif",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fixed the `_apply_weight_conversions_to_state_dict` function to properly handle MoE weight conversions when the renamed key exists in model_state_dict. The bug was that when renamed_key (e.g., `gate_up_proj`) was fo\u2026",
-    "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43524",
-    "created_at": "2026-01-27T11:29:22Z",
-    "deletions": 11,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43524/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43524",
-    "labels": [],
-    "merged": true,
-    "number": 43524,
-    "review_comments_count": 2,
-    "state": "closed",
-    "title": "[DeepSpeed] properly handle MoE weight conversion",
-    "updated_at": "2026-02-10T19:24:06Z"
+    "title": "Add PlutoCallback integration for Pluto experiment tracker",
+    "updated_at": "2026-02-08T14:30:36Z"
   },
   {
-    "additions": 52,
-    "author": "Cyrilvallez",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Fix https://github.com/huggingface/transformers/issues/43522. TLDR we want to skip tying when inside `from_pretrained` (so we add the context manager), but always tie when initializing from config (even with meta de\u2026",
-    "changed_files": 3,
+    "additions": 3,
+    "author": "Mr-Neutr0n",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes a `NameError: name 'TypeAdapter' is not defined` error when importing transformers without pydantic installed. ## Problem The `TypeAdapter` class from pydantic was used as a type annotation in `_validate_requ\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43523",
-    "created_at": "2026-01-27T10:53:08Z",
-    "deletions": 4,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43842",
+    "created_at": "2026-02-08T13:01:19Z",
+    "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43523/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43523",
+    "files_url": "https://github.com/huggingface/transformers/pull/43842/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43842",
     "labels": [],
-    "merged": true,
-    "number": 43523,
+    "merged": false,
+    "number": 43842,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Tie the weights even if initializing from a config on meta device",
-    "updated_at": "2026-01-27T15:45:24Z"
+    "state": "open",
+    "title": "fix(cli): Fix TypeAdapter NameError when pydantic is not installed",
+    "updated_at": "2026-02-08T16:08:04Z"
   },
   {
-    "additions": 228,
-    "author": "remi-or",
-    "author_association": "MEMBER",
-    "body_excerpt": "# Summary This PR introduces minor changes to the continuous batching feature: #### Performance - optimize the `get_seqlens_k` function to avoid looping over all cache managers - logits indexing is now done inside the cuda graph, as there\u2026",
-    "changed_files": 8,
+    "additions": 7,
+    "author": "Mr-Neutr0n",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes a bug where `SwitchTransformersConfig` incorrectly creates sparse layers when `num_sparse_encoder_layers=0` or `num_sparse_decoder_layers=0` is set with a single-layer model. ## Problem When `num_sparse_encod\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43521",
-    "created_at": "2026-01-27T10:07:50Z",
-    "deletions": 169,
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43841",
+    "created_at": "2026-02-08T12:58:27Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43521/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43521",
+    "files_url": "https://github.com/huggingface/transformers/pull/43841/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43841",
     "labels": [],
-    "merged": true,
-    "number": 43521,
-    "review_comments_count": 4,
+    "merged": false,
+    "number": 43841,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "[CB] Minor perf improvements and ty compatibility",
-    "updated_at": "2026-01-28T10:39:49Z"
+    "title": "fix(switch_transformers): Fix sparse layer creation when num_sparse_*_layers=0",
+    "updated_at": "2026-02-09T13:00:01Z"
   },
   {
-    "additions": 19,
-    "author": "tarekziade",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Commit 8dd9c999a6262d6ceb48f4a2da7acaccfa80e3bc introduced a regression by unconditionally reinitializing BatchNorm2d buffers (running_mean, running_var, num_batches_tracked) in the _init_weights() method. The probl\u2026",
+    "additions": 2,
+    "author": "Mr-Neutr0n",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes incorrect timestamp calculation in Qwen3VL Processor by using `temporal_patch_size` instead of `merge_size`. ## Problem The `_calculate_timestamps()` method was being called with `self.video_processor.merge_s\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43520",
-    "created_at": "2026-01-27T08:51:41Z",
-    "deletions": 8,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43840",
+    "created_at": "2026-02-08T12:55:28Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43520/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43520",
+    "files_url": "https://github.com/huggingface/transformers/pull/43840/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43840",
     "labels": [],
-    "merged": true,
-    "number": 43520,
-    "review_comments_count": 2,
+    "merged": false,
+    "number": 43840,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: initialize BatchNorm2d buffers only on meta",
-    "updated_at": "2026-01-27T13:28:29Z"
+    "title": "fix(qwen3_vl): Use temporal_patch_size for timestamp calculation",
+    "updated_at": "2026-02-09T08:56:08Z"
   },
   {
-    "additions": 6,
-    "author": "ariG23498",
-    "author_association": "MEMBER",
-    "body_excerpt": "CC: @MekkCyber",
+    "additions": 9,
+    "author": "Mr-Neutr0n",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "## What does this PR do? Fixes a `RuntimeError: expected mat1 and mat2 to have the same dtype` error when using `torch.autocast` with MoE models like `microsoft/Phi-tiny-MoE-instruct`. ## Problem `torch._grouped_mm` is not autocast-enabled\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 8,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43518",
-    "created_at": "2026-01-27T07:23:19Z",
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43839",
+    "created_at": "2026-02-08T12:21:19Z",
     "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43518/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43518",
+    "files_url": "https://github.com/huggingface/transformers/pull/43839/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43839",
     "labels": [],
     "merged": true,
-    "number": 43518,
+    "number": 43839,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "[Kernels] kernel migration updates for activation kernels",
-    "updated_at": "2026-02-04T08:48:42Z"
+    "title": "fix(moe): Handle dtype mismatch in torch._grouped_mm with autocast",
+    "updated_at": "2026-02-11T14:58:48Z"
   },
   {
-    "additions": 31,
-    "author": "jp1924",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? When using that sp(sequence parallel) in evaluate, the sequence length must be divisible by `world_size`, and the eval batch size must be the same as the train batch size to be able to use it, and this is a PR to mo\u2026",
-    "changed_files": 2,
+    "additions": 1556,
+    "author": "mbtariq82",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? This PR adds Qwen3-ASR to the Transformers library. Fixes #43837 ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [x] Did you read the [co\u2026",
+    "changed_files": 19,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 11,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43517",
-    "created_at": "2026-01-27T07:17:42Z",
-    "deletions": 1,
+    "comments_count": 17,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43838",
+    "created_at": "2026-02-08T12:05:43Z",
+    "deletions": 58,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43517/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43517",
-    "labels": [],
-    "merged": true,
-    "number": 43517,
-    "review_comments_count": 4,
-    "state": "closed",
-    "title": "Make it possible to evaluate when using sequence parallel in HF Trainer",
-    "updated_at": "2026-02-20T07:32:13Z"
+    "files_url": "https://github.com/huggingface/transformers/pull/43838/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43838",
+    "labels": [
+      "New model",
+      "Audio"
+    ],
+    "merged": false,
+    "number": 43838,
+    "review_comments_count": 30,
+    "state": "open",
+    "title": "Proposal to add Qwen3-ASR support [WIP]",
+    "updated_at": "2026-03-31T18:04:03Z"
   },
   {
-    "additions": 18,
-    "author": "yousheng-chen",
-    "author_association": "NONE",
-    "body_excerpt": null,
-    "changed_files": 4,
+    "additions": 79,
+    "author": "pragnyanramtha",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Fixes #43824 what i think happened in #43824 is that waltwalt36 did not install the optional dependencies like pydantic, causing this issue. According to the core architecture docs, transformers implements a lazy loading mechanism for impo\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43516",
-    "created_at": "2026-01-27T04:13:32Z",
-    "deletions": 0,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43836",
+    "created_at": "2026-02-08T11:28:31Z",
+    "deletions": 70,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43516/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43516",
+    "files_url": "https://github.com/huggingface/transformers/pull/43836/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43836",
     "labels": [],
     "merged": false,
-    "number": 43516,
-    "review_comments_count": 0,
-    "state": "closed",
-    "title": "Hub creat branch",
-    "updated_at": "2026-01-27T04:13:52Z"
+    "number": 43836,
+    "review_comments_count": 2,
+    "state": "open",
+    "title": "fix: wrapped TypeAdpater in string literals (for now)",
+    "updated_at": "2026-02-17T04:46:27Z"
   },
   {
-    "additions": 17,
-    "author": "yousheng-chen",
-    "author_association": "NONE",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 3,
+    "additions": 5,
+    "author": "nulone",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Fixes #43828 ## What does this PR do? `torch._grouped_mm` is not registered for autocast. Under `torch.autocast`, LayerNorm outputs float32 while model weights stay bfloat16, causing RuntimeError: \"expected mat1 and mat2 to have same dtype\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43515",
-    "created_at": "2026-01-27T03:43:11Z",
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43833",
+    "created_at": "2026-02-08T07:26:06Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43515/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43515",
+    "files_url": "https://github.com/huggingface/transformers/pull/43833/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43833",
     "labels": [],
     "merged": false,
-    "number": 43515,
+    "number": 43833,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "New branch",
-    "updated_at": "2026-01-27T06:58:27Z"
+    "state": "open",
+    "title": "fix: ensure dtype consistency in grouped_mm under autocast",
+    "updated_at": "2026-02-11T02:28:43Z"
   },
   {
-    "additions": 33586,
-    "author": "yonigozlan",
-    "author_association": "MEMBER",
-    "body_excerpt": "# Image Processor Backend Refactor ## Summary Replaces the dual-file `BaseImageProcessor` (slow/PIL) + `BaseImageProcessorFast` (fast/torchvision) design with a unified backend architecture. The `image_processing_utils_fast` module is remo\u2026",
-    "changed_files": 675,
+    "additions": 0,
+    "author": "nulone",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "Fixes #43827 ## What does this PR do? Removes deprecated `pipeline()` examples from summarization.md and translation.md that reference pre-v5 API. The manual `model.generate()` approach is preserved. ## Before submitting - [x] This PR fixe\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43514",
-    "created_at": "2026-01-27T03:32:12Z",
-    "deletions": 57296,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43832",
+    "created_at": "2026-02-08T07:06:47Z",
+    "deletions": 27,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43514/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43514",
+    "files_url": "https://github.com/huggingface/transformers/pull/43832/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43832",
     "labels": [],
-    "merged": true,
-    "number": 43514,
-    "review_comments_count": 62,
+    "merged": false,
+    "number": 43832,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "\ud83d\udea8\ud83d\udea8 Refactor Image Processors to support different backends",
-    "updated_at": "2026-03-19T14:47:57Z"
+    "title": "docs: remove deprecated pipeline examples from summarization and tran\u2026",
+    "updated_at": "2026-02-08T07:19:52Z"
   },
   {
-    "additions": 36,
-    "author": "heathdutton",
+    "additions": 0,
+    "author": "Mr-Neutr0n",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "For DETR-derived models that use sigmoid/focal loss (Deformable DETR, Grounding DINO, LW-DETR, RT-DETR, D-FINE), the cardinality error calculation was incorrect. These models don't have an explicit background class, so checking `argmax(-1)\u2026",
-    "changed_files": 4,
+    "body_excerpt": "## Summary - Removes `pipeline()`-based inference examples from summarization and translation task documentation - These examples no longer work in v5 since `SummarizationPipeline` and `TranslationPipeline` were removed ## Background Accor\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43513",
-    "created_at": "2026-01-27T02:30:53Z",
-    "deletions": 5,
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43831",
+    "created_at": "2026-02-08T06:39:23Z",
+    "deletions": 27,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43513/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43513",
+    "files_url": "https://github.com/huggingface/transformers/pull/43831/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43831",
     "labels": [],
     "merged": true,
-    "number": 43513,
+    "number": 43831,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix cardinality error for DETR models without explicit background class",
-    "updated_at": "2026-02-09T17:30:43Z"
+    "title": "[docs] Remove pipeline() examples from summarization/translation tasks",
+    "updated_at": "2026-02-09T12:33:04Z"
   },
   {
-    "additions": 83,
-    "author": "jiqing-feng",
+    "additions": 7792,
+    "author": "bozheng-hit",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "CPU mxfp4 moe kernel already implemented. Fix the check to enable mxfp4 on CPU. Waiting for CPU kernel merge and release.",
-    "changed_files": 5,
+    "body_excerpt": "This PR adds the support of codes for the upcoming Qwen3.5 series models. For information about Qwen, please visit: \ud83d\udc49https://qwen.ai Special thanks to @JJJYmmm for helping complete the code in this PR. We also appreciate the valuable feedb\u2026",
+    "changed_files": 28,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 8,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43512",
-    "created_at": "2026-01-27T02:14:48Z",
-    "deletions": 42,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43830",
+    "created_at": "2026-02-08T05:51:57Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43512/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43512",
-    "labels": [],
+    "files_url": "https://github.com/huggingface/transformers/pull/43830/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43830",
+    "labels": [
+      "New model"
+    ],
     "merged": true,
-    "number": 43512,
-    "review_comments_count": 6,
+    "number": 43830,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Enable mxfp4 model on CPU",
-    "updated_at": "2026-02-24T15:22:21Z"
+    "title": "Adding Support for Qwen3.5",
+    "updated_at": "2026-03-03T02:26:31Z"
   },
   {
-    "additions": 1,
-    "author": "sywangyi",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "\u2026onTest::test_inference_mask_generation_batched_points_batched_images pass in xpu @ydshieh",
+    "additions": 30,
+    "author": "jayzuccarelli",
+    "author_association": "NONE",
+    "body_excerpt": "Fixes #43805 Follow-up to #43794: add a pytest fixture that sets a fixed seed (42) before each test so we always get the same RNG state in model tests and improve determinism. - **`tests/conftest.py`** (new): `set_seed` fixture with `autou\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43511",
-    "created_at": "2026-01-27T01:27:43Z",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43829",
+    "created_at": "2026-02-08T05:10:32Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43511/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43511",
-    "labels": [],
-    "merged": true,
-    "number": 43511,
+    "files_url": "https://github.com/huggingface/transformers/pull/43829/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43829",
+    "labels": [
+      "Code agent slop"
+    ],
+    "merged": false,
+    "number": 43829,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Update `SamHQModelIntegrationTest::test_inference_mask_generation_batched_points_batched_images` for `XPU`",
-    "updated_at": "2026-01-27T09:09:48Z"
+    "title": "chore(tests): add set_seed pytest fixture for determinism",
+    "updated_at": "2026-02-10T01:55:12Z"
   },
   {
-    "additions": 18,
-    "author": "pstjohn",
+    "additions": 2,
+    "author": "math-hiyoko",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "This PR fixes and re-enables the skipped `extra_state` tests, since we use this functionality when embedding NVIDIA TransformerEngine layers in PreTrainedModels. Thanks! @ArthurZucker for review",
-    "changed_files": 2,
-    "cluster_id": null,
-    "cluster_ids": [],
-    "cluster_role": null,
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43510",
-    "created_at": "2026-01-26T23:57:17Z",
-    "deletions": 5,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43510/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43510",
-    "labels": [],
-    "merged": true,
-    "number": 43510,
-    "review_comments_count": 3,
-    "state": "closed",
-    "title": "Fix and re-enable extra_state tests",
-    "updated_at": "2026-03-09T12:05:30Z"
-  },
-  {
-    "additions": 9,
-    "author": "oliverholworthy",
-    "author_association": "NONE",
-    "body_excerpt": "This PR restores compatibility with PyTorch versions < 2.4 while preserving the current behavior on newer versions. `torch.is_autocast_enabled(device_type)` was introduced in PyTorch 2.4, but Transformers currently supports torch>=2.2. On\u2026",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43509",
-    "created_at": "2026-01-26T21:45:32Z",
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43826",
+    "created_at": "2026-02-08T01:49:06Z",
     "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43509/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43509",
+    "files_url": "https://github.com/huggingface/transformers/pull/43826/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43826",
     "labels": [],
     "merged": false,
-    "number": 43509,
+    "number": 43826,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Guard torch.is_autocast_enabled(device_type) for torch<2.4",
-    "updated_at": "2026-01-30T13:25:33Z"
+    "state": "open",
+    "title": "fix: error message of pipeline",
+    "updated_at": "2026-02-09T13:26:30Z"
   },
   {
-    "additions": 30,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? PR #42845 fails some workflow runs ``` File \"/home/runner/work/transformers/transformers/utils/get_ci_error_statistics.py\", line 29, in get_jobs result = httpx.get(url + f\"&page={i + 2}\", headers=headers).json() Fil\u2026",
-    "changed_files": 6,
+    "additions": 2045,
+    "author": "redpanda1995",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43507",
-    "created_at": "2026-01-26T20:01:30Z",
-    "deletions": 30,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43823",
+    "created_at": "2026-02-07T20:24:42Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43507/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43507",
+    "files_url": "https://github.com/huggingface/transformers/pull/43823/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43823",
     "labels": [],
-    "merged": true,
-    "number": 43507,
+    "merged": false,
+    "number": 43823,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "Revert utils files changes from db1e6f1e",
-    "updated_at": "2026-01-26T20:50:06Z"
+    "state": "open",
+    "title": "Add `facebook/MobileLLM-125M`",
+    "updated_at": "2026-02-09T11:48:23Z"
   },
   {
-    "additions": 1261,
-    "author": "reach-Harishapc",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "- Implement RishAIModel, RishAICausalLM with proper inheritance - Add RishAIConfig with full MoE and attention parameters - Integrate RishAITokenizer with BPE support - 100% test coverage with comprehensive test suite - Compatible with tra\u2026",
-    "changed_files": 8,
+    "additions": 15,
+    "author": "veeceey",
+    "author_association": "NONE",
+    "body_excerpt": "## What does this PR do? Fixes #43784 When PyTorch < 2.4 is installed, transformers v5.x disables PyTorch by making `is_torch_available()` return `False`. This causes the conditional import of `torch.nn as nn` (line 42) to be skipped. Howe\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43506",
-    "created_at": "2026-01-26T19:38:43Z",
-    "deletions": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43822",
+    "created_at": "2026-02-07T19:20:43Z",
+    "deletions": 13,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43506/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43506",
+    "files_url": "https://github.com/huggingface/transformers/pull/43822/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43822",
     "labels": [],
     "merged": false,
-    "number": 43506,
+    "number": 43822,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Add RishAI model with full transformers integration",
-    "updated_at": "2026-01-27T19:52:25Z"
+    "state": "closed",
+    "title": "Fix NameError: name 'nn' is not defined when PyTorch < 2.4",
+    "updated_at": "2026-02-09T12:53:52Z"
   },
   {
-    "additions": 4,
-    "author": "harshaljanjani",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "### What does this PR do? \u2192 Add backward compat for legacy `segmentation_indices` field in `BeitConfig`. \u2192 Fix the conversion script to set `out_indices` for base models; should the model checkpoints be re-exported after this PR. \u2192 Improve\u2026",
-    "changed_files": 2,
+    "additions": 2,
+    "author": "redpanda1995",
+    "author_association": "FIRST_TIME_CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43505",
-    "created_at": "2026-01-26T18:34:05Z",
-    "deletions": 1,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43821",
+    "created_at": "2026-02-07T19:04:11Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43505/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43505",
+    "files_url": "https://github.com/huggingface/transformers/pull/43821/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43821",
     "labels": [],
     "merged": true,
-    "number": 43505,
-    "review_comments_count": 2,
+    "number": 43821,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(models): Migrate legacy segmentation_indices to out_indices in BeitConfig",
-    "updated_at": "2026-02-23T10:08:28Z"
+    "title": "Fix typo in quantization_operations in PEFT integrations",
+    "updated_at": "2026-02-16T17:59:57Z"
   },
   {
-    "additions": 59,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Allow it to do a subset of simpler fixes instead a full set.",
+    "additions": 10,
+    "author": "harshaljanjani",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "### What does this PR do? The following issue was identified and fixed in this PR: \u2192 Implemented a fix that applies the straight-through estimator to each latent chunk before `out_proj`, following the straight-through estimator pattern use\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43503",
-    "created_at": "2026-01-26T16:50:27Z",
-    "deletions": 33,
+    "comments_count": 17,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43820",
+    "created_at": "2026-02-07T14:44:51Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43503/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43503",
+    "files_url": "https://github.com/huggingface/transformers/pull/43820/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43820",
     "labels": [],
     "merged": true,
-    "number": 43503,
+    "number": 43820,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Enhance repo consistence bot",
-    "updated_at": "2026-01-26T17:13:34Z"
+    "title": "fix(models): Apply STE in Dac.from_latents to match the forward pass",
+    "updated_at": "2026-02-10T17:18:16Z"
   },
   {
-    "additions": 121,
-    "author": "mbtariq82",
-    "author_association": "FIRST_TIME_CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
-    "changed_files": 3,
+    "additions": 135,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "This is a follow-up work on trying to fix flakiness. Adding Global Deterministic Testing # Deterministic Testing Infrastructure - Summary N.B. this is for CPU-only tests ## Problem The test suite has flaky tests that failed intermittently\u2026",
+    "changed_files": 13,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43501",
-    "created_at": "2026-01-26T16:18:02Z",
-    "deletions": 0,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43501/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43501",
+    "comments_count": 3,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43817",
+    "created_at": "2026-02-07T08:33:51Z",
+    "deletions": 57,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/43817/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43817",
     "labels": [],
     "merged": false,
-    "number": 43501,
+    "number": 43817,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Added tokenizer encoder",
-    "updated_at": "2026-01-26T16:43:03Z"
+    "title": "fix(flaky): use a fixture for `set_seed` and single-threading [WIP]",
+    "updated_at": "2026-03-19T10:29:23Z"
   },
   {
-    "additions": 10,
-    "author": "ydshieh2",
+    "additions": 16,
+    "author": "thecaptain789",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "[don't merge] check bot permission",
+    "body_excerpt": "Fixes #43698 Adds support for run resumption in the SwanLab integration via two new environment variables: - `SWANLAB_RUN_ID`: Unique identifier for the run - `SWANLAB_RESUME`: Controls resumption behavior (`must`, `allow`, `never`, `auto`\u2026",
     "changed_files": 1,
-    "cluster_id": "cluster-43488-10",
-    "cluster_ids": [
-      "cluster-43488-10"
-    ],
-    "cluster_role": "member",
-    "comments_count": 11,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43500",
-    "created_at": "2026-01-26T15:12:19Z",
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43816",
+    "created_at": "2026-02-07T03:55:02Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43500/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43500",
+    "files_url": "https://github.com/huggingface/transformers/pull/43816/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43816",
     "labels": [],
     "merged": false,
-    "number": 43500,
+    "number": 43816,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "[don't merge] check bot permission",
-    "updated_at": "2026-01-28T15:11:49Z"
+    "state": "open",
+    "title": "fix: add id and resume parameters to SwanLab integration",
+    "updated_at": "2026-02-09T09:44:45Z"
   },
   {
-    "additions": 12,
-    "author": "Abdennacer-Badaoui",
-    "author_association": "MEMBER",
-    "body_excerpt": "Adds the missing `from_dict()` classmethod to the `GPURawMetrics` dataclass in `benchmark_v2/framework/hardware_metrics.py`. (`BenchmarkResult.from_dict()` is used at line 144 of `data_classes.py`)",
-    "changed_files": 2,
+    "additions": 1,
+    "author": "zyoraclub",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 0,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43499",
-    "created_at": "2026-01-26T14:43:46Z",
-    "deletions": 1,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43815",
+    "created_at": "2026-02-07T03:24:08Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43499/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43499",
+    "files_url": "https://github.com/huggingface/transformers/pull/43815/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43815",
     "labels": [],
-    "merged": true,
-    "number": 43499,
-    "review_comments_count": 0,
+    "merged": false,
+    "number": 43815,
+    "review_comments_count": 1,
     "state": "closed",
-    "title": "Add missing GPURawMetrics.from_dict() method in benchmark_v2",
-    "updated_at": "2026-01-26T15:52:58Z"
+    "title": "Add missing import statement for os module",
+    "updated_at": "2026-02-09T06:28:30Z"
   },
   {
-    "additions": 11,
-    "author": "marcndo",
+    "additions": 0,
+    "author": "cyyever",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fix backward compatibility issue for tie_weights <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set,\u2026",
+    "body_excerpt": "# What does this PR do? This EncoderDecoderCache.batch_split is a remaining method from previous refactoring and is not used by other code.",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43498",
-    "created_at": "2026-01-26T14:17:56Z",
-    "deletions": 0,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/43498/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43498",
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43814",
+    "created_at": "2026-02-07T03:04:26Z",
+    "deletions": 14,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43814/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43814",
     "labels": [],
-    "merged": false,
-    "number": 43498,
+    "merged": true,
+    "number": 43814,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "fix/backward compatibility for tie_weights",
-    "updated_at": "2026-01-26T16:29:08Z"
+    "state": "closed",
+    "title": "Delete batch_split from EncoderDecoderCache",
+    "updated_at": "2026-02-09T13:14:21Z"
   },
   {
-    "additions": 3,
-    "author": "eldarkurtic",
+    "additions": 5,
+    "author": "tobyliu2004",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Loading Llama-4 model with `Llama4ForConditionalGeneration` fails because `self.config.pad_token_id` doesn't exist. For Llama-4 models, `pad_token_id` is inside `text_config` not the general config.",
+    "body_excerpt": "# What does this PR do? Fixes #43784 Fixes `NameError: name 'nn' is not defined` when importing transformers with PyTorch < 2.4. ## The Issue When PyTorch < 2.4 is detected, transformers disables PyTorch by making `is_torch_available()` re\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43497",
-    "created_at": "2026-01-26T13:53:13Z",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43812",
+    "created_at": "2026-02-06T19:53:01Z",
     "deletions": 1,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43497/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43497",
+    "files_url": "https://github.com/huggingface/transformers/pull/43812/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43812",
     "labels": [],
     "merged": false,
-    "number": 43497,
+    "number": 43812,
     "review_comments_count": 0,
-    "state": "closed",
-    "title": "[Bugfix] Extract pad_token_id from text config for Llama-4",
-    "updated_at": "2026-02-10T09:13:12Z"
+    "state": "open",
+    "title": "Fix: Import torch.nn for type hints even when PyTorch is disabled",
+    "updated_at": "2026-02-07T09:35:29Z"
   },
   {
-    "additions": 89,
-    "author": "tomaarsen",
+    "additions": 6,
+    "author": "Rocketknight1",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? This PR fixes the following regression: ```python from transformers import AutoTokenizer tokenizer = AutoTokenizer.from_pretrained(\"sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2\") sentences = [\"C'est u\u2026",
+    "body_excerpt": "The test `tests/models/pe_audio_video/test_modeling_pe_audio_video.py::PeAudioVideoEncoderTest::test_model_forward_default_config_values` is flaky in the CI. In local testing, it failed in 5 out of 100 runs for me. After some digging, I fi\u2026",
     "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43496",
-    "created_at": "2026-01-26T13:24:45Z",
-    "deletions": 65,
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43811",
+    "created_at": "2026-02-06T19:46:15Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43496/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43496",
+    "files_url": "https://github.com/huggingface/transformers/pull/43811/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43811",
     "labels": [],
-    "merged": false,
-    "number": 43496,
-    "review_comments_count": 1,
+    "merged": true,
+    "number": 43811,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Revert tokenization regression, add test",
-    "updated_at": "2026-01-28T10:04:07Z"
+    "title": "Fix video interpolation in pe_audio_video",
+    "updated_at": "2026-02-09T12:51:52Z"
   },
   {
-    "additions": 157,
-    "author": "leoneperdigao",
+    "additions": 10,
+    "author": "michaelfeil",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary <!-- Add a brief summary of changes --> ## Related Issue Fixes #43408 **Issue:** Warning: You are using a model of type sam3_video to instantiate a model of type sam3_tracker **URL:** https://github.com/huggingface/transformers/\u2026",
-    "changed_files": 8,
+    "body_excerpt": "# What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflec\u2026",
+    "changed_files": 4,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 12,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43495",
-    "created_at": "2026-01-26T12:46:21Z",
-    "deletions": 7,
+    "comments_count": 13,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43810",
+    "created_at": "2026-02-06T18:24:13Z",
+    "deletions": 3,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43495/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43495",
+    "files_url": "https://github.com/huggingface/transformers/pull/43810/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43810",
     "labels": [],
-    "merged": true,
-    "number": 43495,
-    "review_comments_count": 4,
+    "merged": false,
+    "number": 43810,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "fix: add compatible_model_types to suppress model type mismatch warnings",
-    "updated_at": "2026-02-05T13:31:24Z"
+    "title": "add bidirectional attention to qwen and llama configs",
+    "updated_at": "2026-02-07T17:40:34Z"
   },
   {
-    "additions": 20,
-    "author": "githubnemo",
+    "additions": 90,
+    "author": "qgallouedec",
     "author_association": "MEMBER",
-    "body_excerpt": "The Qwen3 MoE config was missing the mapping attribute for the num_expert_local config variable which made it impossible to load FP8 quantized models, due to the following exception: ``` Traceback (most recent call last): File \".../exps/tr\u2026",
-    "changed_files": 3,
+    "body_excerpt": "Trainer is going to be heavily refactored over the coming weeks/months (see #43595). To avoid the recurring pattern: PR merges in `transformers` \u2192 TRL CI breaks \u2192 follow-up PR in `transformers` to fix something we could have caught earlier\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 4,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43494",
-    "created_at": "2026-01-26T11:34:05Z",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43809",
+    "created_at": "2026-02-06T17:05:13Z",
     "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43494/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43494",
+    "files_url": "https://github.com/huggingface/transformers/pull/43809/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43809",
     "labels": [],
     "merged": true,
-    "number": 43494,
-    "review_comments_count": 1,
+    "number": 43809,
+    "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix loading of Qwen3 FP8",
-    "updated_at": "2026-01-27T09:56:23Z"
+    "title": "Add TRL CI bot workflow to trigger tests on PR comments",
+    "updated_at": "2026-02-06T17:36:59Z"
   },
   {
-    "additions": 54,
-    "author": "eustlb",
+    "additions": 23,
+    "author": "stevhliu",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do?",
-    "changed_files": 5,
+    "body_excerpt": "expands the base classes, inference, and training sections as these are quite important and will give readers a better idea of what's available",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43492",
-    "created_at": "2026-01-26T10:30:53Z",
-    "deletions": 1,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/43492/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43492",
+    "comments_count": 1,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43808",
+    "created_at": "2026-02-06T16:50:11Z",
+    "deletions": 13,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43808/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43808",
     "labels": [],
-    "merged": false,
-    "number": 43492,
+    "merged": true,
+    "number": 43808,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Perception Encoder follow up PR",
-    "updated_at": "2026-01-26T12:55:35Z"
+    "state": "closed",
+    "title": "[docs] reveal more in toctree",
+    "updated_at": "2026-02-06T22:29:26Z"
   },
   {
-    "additions": 605,
-    "author": "tarekziade",
+    "additions": 319,
+    "author": "SunMarc",
     "author_association": "MEMBER",
-    "body_excerpt": "DRAFT FOR DISCUSSION # What does this PR do? <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great ti\u2026",
-    "changed_files": 4,
+    "body_excerpt": "# What does this PR do? This PR simplifies Trainer `__init__`: - Quantization validation extracted - PEFT unwrapping deduplicated - Liger Kernel extracted \u2014 apply_liger_kernel - Label smoother simplified - Validations grouped \u2014 `_validate_\u2026",
+    "changed_files": 5,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43491",
-    "created_at": "2026-01-26T10:30:51Z",
-    "deletions": 18,
-    "draft": true,
-    "files_url": "https://github.com/huggingface/transformers/pull/43491/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43491",
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43807",
+    "created_at": "2026-02-06T16:18:12Z",
+    "deletions": 250,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43807/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43807",
     "labels": [],
-    "merged": false,
-    "number": 43491,
-    "review_comments_count": 10,
+    "merged": true,
+    "number": 43807,
+    "review_comments_count": 12,
     "state": "closed",
-    "title": "Improved new model template",
-    "updated_at": "2026-02-24T11:44:40Z"
+    "title": "Refactor trainer init",
+    "updated_at": "2026-02-10T15:00:21Z"
   },
   {
-    "additions": 539,
-    "author": "tarekziade",
+    "additions": 915,
+    "author": "SunMarc",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? It makes sure `extras` can be installed on all supported Python versions. - cleaned up extras (removed natten, tweaked mistral-common etc,) - adds a supported Python version range (10->14) - dynamically update the m\u2026",
-    "changed_files": 7,
+    "body_excerpt": "# What does this PR do? This PR updates TrainingArguments in several places: - More tests - Simplify the post_init - update and reorder __init__ For the review, just check that the post_init is correctly modified",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
     "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43490",
-    "created_at": "2026-01-26T10:27:44Z",
-    "deletions": 50,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43806",
+    "created_at": "2026-02-06T16:16:45Z",
+    "deletions": 639,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43490/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43490",
+    "files_url": "https://github.com/huggingface/transformers/pull/43806/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43806",
     "labels": [],
     "merged": true,
-    "number": 43490,
-    "review_comments_count": 21,
-    "state": "closed",
-    "title": "Fix extras on all supported Python versions",
-    "updated_at": "2026-01-30T15:14:55Z"
-  },
-  {
-    "additions": 10,
-    "author": "ydshieh",
-    "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? check repo bot, don't merge",
-    "changed_files": 3,
-    "cluster_id": "cluster-43488-10",
-    "cluster_ids": [
-      "cluster-43488-10"
-    ],
-    "cluster_role": "member",
-    "comments_count": 6,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43488",
-    "created_at": "2026-01-26T10:13:21Z",
-    "deletions": 1,
-    "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43488/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43488",
-    "labels": [],
-    "merged": false,
-    "number": 43488,
-    "review_comments_count": 0,
-    "state": "open",
-    "title": "[don't merge] bad format to check repo bot",
-    "updated_at": "2026-01-26T17:19:16Z"
+    "number": 43806,
+    "review_comments_count": 7,
+    "state": "closed",
+    "title": "Update TrainingArguments",
+    "updated_at": "2026-02-24T12:32:04Z"
   },
   {
-    "additions": 6,
-    "author": "tomaarsen",
+    "additions": 31,
+    "author": "tarekziade",
     "author_association": "MEMBER",
-    "body_excerpt": "Resolves #43474 Resolves #43475 # What does this PR do? * Removes `fpn_position_embeddings`, should have been `fpn_position_encoding` all along. Affected 3 architectures, and was introduced in #42564 3 days ago. * Uses `...get_text_feature\u2026",
-    "changed_files": 5,
+    "body_excerpt": "# What does this PR do? The difference check was returning False 40% of the times and was reproducible locally tested with ``` pytest -svx tests/models/ernie4_5_moe/test_modeling_ernie4_5_moe.py -k test_load_balancing_loss --flake-finder `\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 5,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43487",
-    "created_at": "2026-01-26T10:08:59Z",
-    "deletions": 5,
+    "comments_count": 9,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43804",
+    "created_at": "2026-02-06T14:24:50Z",
+    "deletions": 11,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43487/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43487",
+    "files_url": "https://github.com/huggingface/transformers/pull/43804/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43804",
     "labels": [],
     "merged": true,
-    "number": 43487,
-    "review_comments_count": 0,
+    "number": 43804,
+    "review_comments_count": 2,
     "state": "closed",
-    "title": "[`fix`] Sam3Video: Avoid fpn_position_embedding; use pooler_output",
-    "updated_at": "2026-01-26T10:38:53Z"
+    "title": "fix(flaky): Different approach to make sure loss exists",
+    "updated_at": "2026-02-20T07:45:27Z"
   },
   {
-    "additions": 23,
-    "author": "zucchini-nlp",
+    "additions": 2,
+    "author": "vasqu",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? As per title, it was not working when input was a batched 5D array Fixes https://github.com/huggingface/transformers/issues/43450",
-    "changed_files": 2,
+    "body_excerpt": "As per title, I merged #43772 after #43400 which caused this mismatch - causes red CI",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43486",
-    "created_at": "2026-01-26T09:54:12Z",
-    "deletions": 1,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43803",
+    "created_at": "2026-02-06T14:14:58Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43486/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43486",
+    "files_url": "https://github.com/huggingface/transformers/pull/43803/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43803",
     "labels": [],
     "merged": true,
-    "number": 43486,
+    "number": 43803,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix `make_batched_video` with 5D arrays",
-    "updated_at": "2026-01-30T10:27:50Z"
+    "title": "[`Repo Consistency`] Fix rms norm",
+    "updated_at": "2026-02-06T14:59:20Z"
   },
   {
-    "additions": 1,
-    "author": "LysandreJik",
-    "author_association": "MEMBER",
-    "body_excerpt": null,
+    "additions": 7,
+    "author": "Sankalpkumarsingh1234",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? This PR adds a short real-world use case section to help users understand practical applications of text summarization. <!-- Congratulations! You've made it this far! You're not quite done yet though. Once merged, y\u2026",
     "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43485",
-    "created_at": "2026-01-26T09:51:52Z",
-    "deletions": 12,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43802",
+    "created_at": "2026-02-06T13:41:03Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43485/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43485",
+    "files_url": "https://github.com/huggingface/transformers/pull/43802/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43802",
     "labels": [],
-    "merged": true,
-    "number": 43485,
+    "merged": false,
+    "number": 43802,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Revise MIGRATION_GUIDE_V5.md for version 5 updates",
-    "updated_at": "2026-01-26T10:01:11Z"
+    "title": "Add real-world use case section for text summarization",
+    "updated_at": "2026-02-09T12:14:45Z"
   },
   {
-    "additions": 70,
-    "author": "raimbekovm",
+    "additions": 8,
+    "author": "robell",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Optimizes timestamp rendering in Ernie 4.5 VL video processing by caching text overlays and using torch alpha blending instead of slow `torch->PIL->torch` conversion for each frame. **Performance improvement:** | Mo\u2026",
-    "changed_files": 1,
+    "body_excerpt": "When using the torch.export path with this model we hit an issue on correction_coefs being modified with unknown side-effects the dynamo can't catch. So it lifts this tensor unnecessarily, and fails assuming mutation is needed RuntimeError\u2026",
+    "changed_files": 2,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 1,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43484",
-    "created_at": "2026-01-26T09:40:50Z",
-    "deletions": 25,
+    "comments_count": 8,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43801",
+    "created_at": "2026-02-06T13:26:58Z",
+    "deletions": 4,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43484/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43484",
+    "files_url": "https://github.com/huggingface/transformers/pull/43801/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43801",
     "labels": [],
+    "merged": true,
+    "number": 43801,
+    "review_comments_count": 3,
+    "state": "closed",
+    "title": "Fix lifted_tensor in Gemma3n export which dynamo can't reason about",
+    "updated_at": "2026-02-12T09:58:33Z"
+  },
+  {
+    "additions": 13182,
+    "author": "harshang03",
+    "author_association": "NONE",
+    "body_excerpt": "# What does this PR do? This PR makes remove_unused_columns=True customizable for trainers that transform dataset columns before calling the model. It introduces an optional signature_columns argument on Trainer to control which dataset co\u2026",
+    "changed_files": 4,
+    "cluster_id": null,
+    "cluster_ids": [],
+    "cluster_role": null,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43800",
+    "created_at": "2026-02-06T13:07:32Z",
+    "deletions": 0,
+    "draft": true,
+    "files_url": "https://github.com/huggingface/transformers/pull/43800/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43800",
+    "labels": [
+      "Code agent slop"
+    ],
     "merged": false,
-    "number": 43484,
+    "number": 43800,
     "review_comments_count": 0,
-    "state": "open",
-    "title": "Optimize Ernie 4.5 VL timestamp rendering with cached overlays",
-    "updated_at": "2026-01-26T09:41:54Z"
+    "state": "closed",
+    "title": "Add new Trainer utilities and documentation",
+    "updated_at": "2026-02-06T13:42:08Z"
   },
   {
-    "additions": 36,
-    "author": "tarekziade",
+    "additions": 14,
+    "author": "kashif",
     "author_association": "MEMBER",
-    "body_excerpt": "# What does this PR do? Uses bucket=module to randomize tests within files, compatible with --dist=loadfile. CI uses CIRCLE_BUILD_NUM as seed for reproducibility across parallel containers. Local runs use random seed to catch order depende\u2026",
-    "changed_files": 5,
+    "body_excerpt": "# What does this PR do? With this change: https://github.com/huggingface/accelerate/pull/3916 there is no torch device mesh when sp_backend=\"deepspeed\", and transformers currently assumes it exists. That\u2019s why you get: ``` sp_group = self.\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 7,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43483",
-    "created_at": "2026-01-26T08:34:02Z",
-    "deletions": 27,
+    "comments_count": 2,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43799",
+    "created_at": "2026-02-06T13:03:20Z",
+    "deletions": 2,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43483/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43483",
+    "files_url": "https://github.com/huggingface/transformers/pull/43799/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43799",
     "labels": [],
     "merged": true,
-    "number": 43483,
-    "review_comments_count": 7,
+    "number": 43799,
+    "review_comments_count": 3,
     "state": "closed",
-    "title": "Add pytest-random-order for reproducible test randomization",
-    "updated_at": "2026-01-26T16:02:04Z"
+    "title": "[Trainer] use deepspeed SP process group when Accelerate doesn\u2019t build a mesh",
+    "updated_at": "2026-02-06T16:15:28Z"
   },
   {
-    "additions": 2,
-    "author": "raimbekovm",
+    "additions": 15,
+    "author": "harshaljanjani",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "# What does this PR do? Fixes a bug where `vision_eager_attention_forward` ignored the passed `scaling` parameter and used `module.head_dim**-0.5` instead. This causes incorrect attention scores under Tensor Parallelism (TP) where head dim\u2026",
-    "changed_files": 1,
+    "body_excerpt": "### What does this PR do? The following failing tests were identified and fixed in this PR: \u2192 **BLOOM:** `batch_encode_plus()` method was removed from `PreTrainedTokenizerBase` in commit 05c0e1d390 (the \"rm slow tokenizers\" refactor, #4093\u2026",
+    "changed_files": 3,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 9,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43481",
-    "created_at": "2026-01-26T05:55:08Z",
-    "deletions": 2,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43798",
+    "created_at": "2026-02-06T13:00:13Z",
+    "deletions": 6,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43481/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43481",
+    "files_url": "https://github.com/huggingface/transformers/pull/43798/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43798",
     "labels": [],
-    "merged": false,
-    "number": 43481,
+    "merged": true,
+    "number": 43798,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "Fix Llama4 vision attention scaling for TP compatibility",
-    "updated_at": "2026-01-27T09:52:18Z"
+    "title": "fix(testing): Fix BLOOM tokenizer, CLAP audio features, and CLVP text tester usage in tests",
+    "updated_at": "2026-02-06T13:57:32Z"
   },
   {
-    "additions": 4,
-    "author": "charlieJ107",
-    "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## \ud83d\udc1b Bug Fix: Phi4MultimodalConfig default sub-config initialization This PR fixes two issues in `Phi4MultimodalConfig.__init__` related to default initialization of multimodal sub-configs. Rations in Phi4MultimodalConfig # What does this\u2026",
-    "changed_files": 2,
+    "additions": 88,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "Let's see if we can fix that flaky",
+    "changed_files": 10,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 2,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43480",
-    "created_at": "2026-01-26T01:32:08Z",
-    "deletions": 4,
+    "comments_count": 4,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43797",
+    "created_at": "2026-02-06T12:14:02Z",
+    "deletions": 31,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43480/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43480",
+    "files_url": "https://github.com/huggingface/transformers/pull/43797/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43797",
     "labels": [],
-    "merged": true,
-    "number": 43480,
+    "merged": false,
+    "number": 43797,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "fix(Phi4Multimodal): Fix incorrect default vision/audio config initialization in Phi4MultimodalConfig",
-    "updated_at": "2026-01-26T14:02:23Z"
+    "title": "preventing I/O errors on closed streams in the `cli` helper",
+    "updated_at": "2026-02-06T15:03:26Z"
   },
   {
-    "additions": 139,
-    "author": "karthikthota-03",
-    "author_association": "FIRST_TIMER",
-    "body_excerpt": "**Related Issue** Fixes #43472 **Overview** This PR is to refactor the expert implementation in Qwen2Moe by introducing a standardized BatchLinear utility. The current implementation of Qwen2MoeExperts relies on a custom expert registry an\u2026",
-    "changed_files": 4,
+    "additions": 37,
+    "author": "hemanth678901-stack",
+    "author_association": "NONE",
+    "body_excerpt": "This PR clarifies a common misconception around GPU memory usage during training, particularly when using the Adafactor optimizer with DeepSpeed. Several users expect Adafactor to always reduce peak GPU memory due to reduced optimizer stat\u2026",
+    "changed_files": 1,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43478",
-    "created_at": "2026-01-25T16:26:24Z",
-    "deletions": 45,
+    "comments_count": 0,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43796",
+    "created_at": "2026-02-06T11:55:33Z",
+    "deletions": 0,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43478/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43478",
+    "files_url": "https://github.com/huggingface/transformers/pull/43796/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43796",
     "labels": [
       "Code agent slop"
     ],
     "merged": false,
-    "number": 43478,
+    "number": 43796,
     "review_comments_count": 0,
     "state": "closed",
-    "title": "refactor: use BatchLinear in Qwen2Moe to enable PEFT/LoRA support",
-    "updated_at": "2026-01-26T13:31:10Z"
+    "title": "Clarify GPU memory expectations when using Adafactor with DeepSpeed",
+    "updated_at": "2026-02-06T13:37:49Z"
   },
   {
-    "additions": 2,
-    "author": "raimbekovm",
+    "additions": 1464,
+    "author": "liu-jiaxuan",
     "author_association": "CONTRIBUTOR",
-    "body_excerpt": "## Summary Remove outdated TODO comments claiming patch embedding weight tying is \"not working\". ## Details Testing confirms the tying mechanism works correctly: - `patch_embed.proj.weight` and `encoder.embed_patches.proj.weight` share the\u2026",
-    "changed_files": 1,
+    "body_excerpt": "# What does this PR do? Fixes # (issue) ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Did you read the [contributor guideline](https://github.com/huggingfa\u2026",
+    "changed_files": 20,
+    "cluster_id": "cluster-43098-11",
+    "cluster_ids": [
+      "cluster-43098-11"
+    ],
+    "cluster_role": "member",
+    "comments_count": 6,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43795",
+    "created_at": "2026-02-06T11:22:56Z",
+    "deletions": 9,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43795/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43795",
+    "labels": [],
+    "merged": false,
+    "number": 43795,
+    "review_comments_count": 38,
+    "state": "closed",
+    "title": "[Model] Add PP-OCRv5_server_rec Model Support",
+    "updated_at": "2026-03-18T16:33:35Z"
+  },
+  {
+    "additions": 178,
+    "author": "tarekziade",
+    "author_association": "MEMBER",
+    "body_excerpt": "This patch aims to reduce flakiness in CI tests. We identified two causes of nondeterministic behavior: - Some tests were not using a fixed RNG seed, which reduced determinism. - The cli tests were occasionally triggering I/O errors due to\u2026",
+    "changed_files": 28,
     "cluster_id": null,
     "cluster_ids": [],
     "cluster_role": null,
-    "comments_count": 3,
-    "conversation_url": "https://github.com/huggingface/transformers/pull/43477",
-    "created_at": "2026-01-25T13:30:13Z",
-    "deletions": 2,
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43794",
+    "created_at": "2026-02-06T09:54:59Z",
+    "deletions": 114,
     "draft": false,
-    "files_url": "https://github.com/huggingface/transformers/pull/43477/files",
-    "html_url": "https://github.com/huggingface/transformers/pull/43477",
+    "files_url": "https://github.com/huggingface/transformers/pull/43794/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43794",
     "labels": [],
     "merged": true,
-    "number": 43477,
-    "review_comments_count": 0,
+    "number": 43794,
+    "review_comments_count": 1,
+    "state": "closed",
+    "title": "Enforce manual seed to reduce flakiness",
+    "updated_at": "2026-02-06T16:30:31Z"
+  },
+  {
+    "additions": 5225,
+    "author": "liu-jiaxuan",
+    "author_association": "CONTRIBUTOR",
+    "body_excerpt": "# What does this PR do? Fixes # (issue) ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Did you read the [contributor guideline](https://github.com/huggingfa\u2026",
+    "changed_files": 15,
+    "cluster_id": "cluster-43098-11",
+    "cluster_ids": [
+      "cluster-43098-11"
+    ],
+    "cluster_role": "member",
+    "comments_count": 5,
+    "conversation_url": "https://github.com/huggingface/transformers/pull/43793",
+    "created_at": "2026-02-06T09:34:48Z",
+    "deletions": 0,
+    "draft": false,
+    "files_url": "https://github.com/huggingface/transformers/pull/43793/files",
+    "html_url": "https://github.com/huggingface/transformers/pull/43793",
+    "labels": [],
+    "merged": false,
+    "number": 43793,
+    "review_comments_count": 4,
     "state": "closed",
-    "title": "Remove stale TODO comments in UDOP tied weights",
-    "updated_at": "2026-01-26T14:36:01Z"
+    "title": "[Model] Add PP-OCRV5_mobile_rec Model Support",
+    "updated_at": "2026-03-19T10:50:04Z"
   }
 ]