diff --git a/.bandit b/.bandit new file mode 100644 index 0000000000000000000000000000000000000000..2d81286aee751afae8630a21986eb04f36e7e6d7 --- /dev/null +++ b/.bandit @@ -0,0 +1,3 @@ +[bandit] +exclude = tests +skips = B101 diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000000000000000000000000000000000000..df80c53a12dbe60f3e3f60c486cedad044e81ecd --- /dev/null +++ b/.editorconfig @@ -0,0 +1,14 @@ +root = true + +[*] +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true + +[*.py] +indent_style = space +indent_size = 4 + +[**.yml] +indent_style = space +indent_size = 2 diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000000000000000000000000000000000000..fd69af775696d49c5397839d5bd6320fe6a73f4b --- /dev/null +++ b/.flake8 @@ -0,0 +1,5 @@ +[flake8] +max-line-length = 88 + +select = C,E,F,W,B,B950 +extend-ignore = E203, E501, W503 diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..931993151044a4b390c3753ab0c52b7218044aca 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,35 +1,44 @@ -*.7z filter=lfs diff=lfs merge=lfs -text -*.arrow filter=lfs diff=lfs merge=lfs -text -*.bin filter=lfs diff=lfs merge=lfs -text -*.bz2 filter=lfs diff=lfs merge=lfs -text -*.ckpt filter=lfs diff=lfs merge=lfs -text -*.ftz filter=lfs diff=lfs merge=lfs -text -*.gz filter=lfs diff=lfs merge=lfs -text -*.h5 filter=lfs diff=lfs merge=lfs -text -*.joblib filter=lfs diff=lfs merge=lfs -text -*.lfs.* filter=lfs diff=lfs merge=lfs -text -*.mlmodel filter=lfs diff=lfs merge=lfs -text -*.model filter=lfs diff=lfs merge=lfs -text -*.msgpack filter=lfs diff=lfs merge=lfs -text -*.npy filter=lfs diff=lfs merge=lfs -text -*.npz filter=lfs diff=lfs merge=lfs -text -*.onnx filter=lfs diff=lfs merge=lfs -text -*.ot filter=lfs diff=lfs merge=lfs -text -*.parquet filter=lfs diff=lfs merge=lfs -text -*.pb filter=lfs diff=lfs merge=lfs -text -*.pickle filter=lfs diff=lfs merge=lfs -text -*.pkl filter=lfs diff=lfs merge=lfs -text -*.pt filter=lfs diff=lfs merge=lfs -text -*.pth filter=lfs diff=lfs merge=lfs -text -*.rar filter=lfs diff=lfs merge=lfs -text -*.safetensors filter=lfs diff=lfs merge=lfs -text -saved_model/**/* filter=lfs diff=lfs merge=lfs -text -*.tar.* filter=lfs diff=lfs merge=lfs -text -*.tar filter=lfs diff=lfs merge=lfs -text -*.tflite filter=lfs diff=lfs merge=lfs -text -*.tgz filter=lfs diff=lfs merge=lfs -text -*.wasm filter=lfs diff=lfs merge=lfs -text -*.xz filter=lfs diff=lfs merge=lfs -text -*.zip filter=lfs diff=lfs merge=lfs -text -*.zst filter=lfs diff=lfs merge=lfs -text -*tfevents* filter=lfs diff=lfs merge=lfs -text +data/*.jsonl filter=lfs diff=lfs merge=lfs -text +axol/bin/python filter=lfs diff=lfs merge=lfs -text +axol/bin/python3 filter=lfs diff=lfs merge=lfs -text +axol/bin/python3.10 filter=lfs diff=lfs merge=lfs -text +git-lfs-3.2.0/git-lfs filter=lfs diff=lfs merge=lfs -text +git-lfs-linux-amd64-v3.2.0.tar.gz filter=lfs diff=lfs merge=lfs -text +out/adapter_model.bin filter=lfs diff=lfs merge=lfs -text +out/adapter_model.safetensors filter=lfs diff=lfs merge=lfs -text +out/checkpoint-126/adapter_model.safetensors filter=lfs diff=lfs merge=lfs -text +out/checkpoint-126/optimizer.pt filter=lfs diff=lfs merge=lfs -text +out/checkpoint-126/rng_state_0.pth filter=lfs diff=lfs merge=lfs -text +out/checkpoint-126/rng_state_1.pth filter=lfs diff=lfs merge=lfs -text +out/checkpoint-126/scheduler.pt filter=lfs diff=lfs merge=lfs -text +out/checkpoint-126/tokenizer.model filter=lfs diff=lfs merge=lfs -text +out/checkpoint-126/training_args.bin filter=lfs diff=lfs merge=lfs -text +out/checkpoint-189/adapter_model.safetensors filter=lfs diff=lfs merge=lfs -text +out/checkpoint-189/optimizer.pt filter=lfs diff=lfs merge=lfs -text +out/checkpoint-189/rng_state_0.pth filter=lfs diff=lfs merge=lfs -text +out/checkpoint-189/rng_state_1.pth filter=lfs diff=lfs merge=lfs -text +out/checkpoint-189/scheduler.pt filter=lfs diff=lfs merge=lfs -text +out/checkpoint-189/tokenizer.model filter=lfs diff=lfs merge=lfs -text +out/checkpoint-189/training_args.bin filter=lfs diff=lfs merge=lfs -text +out/checkpoint-230/adapter_model.safetensors filter=lfs diff=lfs merge=lfs -text +out/checkpoint-230/optimizer.pt filter=lfs diff=lfs merge=lfs -text +out/checkpoint-230/rng_state_0.pth filter=lfs diff=lfs merge=lfs -text +out/checkpoint-230/rng_state_1.pth filter=lfs diff=lfs merge=lfs -text +out/checkpoint-230/scheduler.pt filter=lfs diff=lfs merge=lfs -text +out/checkpoint-230/tokenizer.model filter=lfs diff=lfs merge=lfs -text +out/checkpoint-230/training_args.bin filter=lfs diff=lfs merge=lfs -text +out/checkpoint-345/adapter_model.safetensors filter=lfs diff=lfs merge=lfs -text +out/checkpoint-345/optimizer.pt filter=lfs diff=lfs merge=lfs -text +out/checkpoint-345/rng_state_0.pth filter=lfs diff=lfs merge=lfs -text +out/checkpoint-345/rng_state_1.pth filter=lfs diff=lfs merge=lfs -text +out/checkpoint-345/scheduler.pt filter=lfs diff=lfs merge=lfs -text +out/checkpoint-345/tokenizer.model filter=lfs diff=lfs merge=lfs -text +out/checkpoint-345/training_args.bin filter=lfs diff=lfs merge=lfs -text +out/merged/git-lfs-3.2.0/git-lfs filter=lfs diff=lfs merge=lfs -text +out/merged/git-lfs-linux-amd64-v3.2.0.tar.gz.1 filter=lfs diff=lfs merge=lfs -text +out/merged/pytorch_model-00001-of-00003.bin filter=lfs diff=lfs merge=lfs -text +out/merged/pytorch_model-00002-of-00003.bin filter=lfs diff=lfs merge=lfs -text +out/merged/pytorch_model-00003-of-00003.bin filter=lfs diff=lfs merge=lfs -text +out/merged/tokenizer.model filter=lfs diff=lfs merge=lfs -text +out/tokenizer.model filter=lfs diff=lfs merge=lfs -text +out/training_args.bin filter=lfs diff=lfs merge=lfs -text diff --git a/.github/CODE_OF_CONDUCT.md b/.github/CODE_OF_CONDUCT.md new file mode 100644 index 0000000000000000000000000000000000000000..865b95d2a747f258c211edf28a96e9e32fa5179d --- /dev/null +++ b/.github/CODE_OF_CONDUCT.md @@ -0,0 +1,129 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, religion, or sexual identity +and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the + overall community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or + advances of any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email + address, without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement on Discord +at https://discord.gg/QYF8QrtEUm + +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series +of actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or +permanent ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within +the community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.0, available at +https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. + +Community Impact Guidelines were inspired by [Mozilla's code of conduct +enforcement ladder](https://github.com/mozilla/diversity). + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see the FAQ at +https://www.contributor-covenant.org/faq. Translations are available at +https://www.contributor-covenant.org/translations. diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md new file mode 100644 index 0000000000000000000000000000000000000000..9eec23e1a3c484063f62e8b825af901dc5f03f8d --- /dev/null +++ b/.github/CONTRIBUTING.md @@ -0,0 +1,76 @@ +# Contributing to axolotl + +First of all, thank you for your interest in contributing to axolotl! We appreciate the time and effort you're willing to invest in making our project better. This document provides guidelines and information to make the contribution process as smooth as possible. + +## Table of Contents + +- [Code of Conduct](#code-of-conduct) +- [Getting Started](#getting-started) +- [How to Contribute](#how-to-contribute) + - [Reporting Bugs](#reporting-bugs) + - [Suggesting Enhancements](#suggesting-enhancements) + - [Submitting Pull Requests](#submitting-pull-requests) +- [Style Guidelines](#style-guidelines) + - [Code Style](#code-style) + - [Commit Messages](#commit-messages) +- [Additional Resources](#additional-resources) + +## Code of Conductcode + +All contributors are expected to adhere to our [Code of Conduct](CODE_OF_CONDUCT.md). Please read it before participating in the axolotl community. + +## Getting Started + +Bugs? Please check for open issue else create a new [Issue](https://github.com/OpenAccess-AI-Collective/axolotl/issues/new). + +PRs are **greatly welcome**! + +1. Fork the repository and clone it to your local machine. +2. Set up the development environment by following the instructions in the [README.md](https://github.com/OpenAccess-AI-Collective/axolotl/tree/main/README.md) file. +3. Explore the codebase, run tests, and verify that everything works as expected. + +Please run below to setup env +```bash +pip3 install -r requirements-dev.txt -r requirements-tests.txt +pre-commit install + +# test +pytest tests/ +``` + +## How to Contribute + +### Reporting Bugs + +If you encounter a bug or issue while using axolotl, please open a new issue on the [GitHub Issues](https://github.com/OpenAccess-AI-Collective/axolotl/issues) page. Provide a clear and concise description of the problem, steps to reproduce it, and any relevant error messages or logs. + +### Suggesting Enhancements + +We welcome ideas for improvements and new features. To suggest an enhancement, open a new issue on the [GitHub Issues](https://github.com/OpenAccess-AI-Collective/axolotl/issues) page. Describe the enhancement in detail, explain the use case, and outline the benefits it would bring to the project. + +### Submitting Pull Requests + +1. Create a new branch for your feature or bugfix. Use a descriptive name like `feature/your-feature-name` or `fix/your-bugfix-name`. +2. Make your changes, following the [Style Guidelines](#style-guidelines) below. +3. Test your changes and ensure that they don't introduce new issues or break existing functionality. +4. Commit your changes, following the [commit message guidelines](#commit-messages). +5. Push your branch to your fork on GitHub. +6. Open a new pull request against the `main` branch of the axolotl repository. Include a clear and concise description of your changes, referencing any related issues. + +## Style Guidelines + +### Code Style + +axolotl uses [{codestyle}]({URLofCodestyle}) as its code style guide. Please ensure that your code follows these guidelines. + +### Commit Messages + +Write clear and concise commit messages that briefly describe the changes made in each commit. Use the imperative mood and start with a capitalized verb, e.g., "Add new feature" or "Fix bug in function". + +## Additional Resources + +- [GitHub Help](https://help.github.com/) +- [GitHub Pull Request Documentation](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests) +- [{codestyle}]({URLofCodestyle}) + +Thank you once again for your interest in contributing to axolotl. We look forward to collaborating with you and creating an even better project together! diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 0000000000000000000000000000000000000000..4f6ea8de73a35dfec18cb813a9c23e9f09f831b6 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1,13 @@ +# These are supported funding model platforms + +github: [winglian, OpenAccess-AI-Collective] # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] +patreon: # Replace with a single Patreon username +open_collective: # Replace with a single Open Collective username +ko_fi: axolotl_ai # Replace with a single Ko-fi username +tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel +community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry +liberapay: # Replace with a single Liberapay username +issuehunt: # Replace with a single IssueHunt username +otechie: # Replace with a single Otechie username +lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry +custom: ['https://quickchart.io/qr?text=bitcoin%3Abc1qxlgwlqwfea5s2cxm42xqsfmwjct0rj8w8ea5np&size=480¢erImageUrl=https%3A%2F%2Fupload.wikimedia.org%2Fwikipedia%2Fcommons%2Fthumb%2F4%2F46%2FBitcoin.svg%2F64px-Bitcoin.svg.png'] # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] diff --git a/.github/ISSUE_TEMPLATE/bug-report.yaml b/.github/ISSUE_TEMPLATE/bug-report.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1ed703d4e84f4f5a388cdd6edb7f0bcdebdb8c50 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug-report.yaml @@ -0,0 +1,113 @@ +name: Bug Report +description: File a bug report +labels: ["bug", "needs triage"] +body: + - type: markdown + attributes: + value: | + ## Before you start + Please **make sure you are on the latest version.** + If you encountered the issue after you installed, updated, or reloaded, **please try restarting before reporting the bug**. + + - type: checkboxes + id: no-duplicate-issues + attributes: + label: "Please check that this issue hasn't been reported before." + description: "The **Label filters** may help make your search more focussed." + options: + - label: "I searched previous [Bug Reports](https://github.com/OpenAccess-AI-Collective/axolotl/labels/bug) didn't find any similar reports." + required: true + + - type: textarea + id: expected + attributes: + label: Expected Behavior + description: Tell us what **should** happen. + validations: + required: true + + - type: textarea + id: what-happened + attributes: + label: Current behaviour + description: | + Tell us what happens instead of the expected behavior. + Provide stacktrace and/or screenshots. + validations: + required: true + + - type: textarea + id: reproduce + attributes: + label: Steps to reproduce + description: | + Which exact steps can a developer take to reproduce the issue? + The more detail you provide, the easier it will be to narrow down and fix the bug. + Please paste in tasks and/or queries **as text, not screenshots**. + placeholder: | + Example of the level of detail needed to reproduce any bugs efficiently and reliably. + 1. Go to the '...' page. + 2. Click on the '...' button. + 3. Scroll down to '...'. + 4. Observe the error. + validations: + required: true + + - type: textarea + id: config + attributes: + label: Config yaml + description: | + Please attach the config yaml! + render: yaml + + - type: textarea + id: possible-solution + attributes: + label: Possible solution + description: | + Not obligatory, but please suggest a fix or reason for the bug, if you have an idea. + + + - type: checkboxes + id: operating-systems + attributes: + label: Which Operating Systems are you using? + description: You may select more than one. + options: + - label: Linux + - label: macOS + - label: Windows + + - type: input + id: Python-version + attributes: + label: Python Version + description: Which {Programming} version are you using? + placeholder: 3.10 / please change accordingly + validations: + required: true + + - type: input + id: axolotl-branch-commit + attributes: + label: axolotl branch-commit + description: On which branch/commit are you? + placeholder: main/4d6490b + validations: + required: true + + - type: checkboxes + id: acknowledgements + attributes: + label: 'Acknowledgements' + description: 'Please confirm the following:' + options: + - label: 'My issue title is concise, descriptive, and in title casing.' + required: true + - label: 'I have searched the existing issues to make sure this bug has not been reported yet.' + required: true + - label: 'I am using the latest version of axolotl.' + required: true + - label: 'I have provided enough information for the maintainers to reproduce and diagnose the issue.' + required: true diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000000000000000000000000000000000000..e0c5754fb9b395438ff2e7aa04ce8b5c7d93dc0d --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,7 @@ +blank_issues_enabled: false +contact_links: + - name: Ask a question + url: https://github.com/OpenAccess-AI-Collective/axolotl/discussions/categories/q-a + about: Ask questions and discuss with other community members + - name: Discuss the Project in Discord + url: https://discord.gg/HhrNrHJPRb diff --git a/.github/ISSUE_TEMPLATE/docs.yml b/.github/ISSUE_TEMPLATE/docs.yml new file mode 100644 index 0000000000000000000000000000000000000000..2c27af1aa07c7c053413a5d78a3df0e9bc7e1f85 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/docs.yml @@ -0,0 +1,46 @@ +name: Documentation Improvement / Clarity +description: Make a suggestion to improve the project documentation. +labels: ['needs triage', 'docs'] +body: + - type: markdown + attributes: + value: '## :book: Documentation :book:' + - type: markdown + attributes: + value: | + * Ask questions in [Discord](https://discord.gg/HhrNrHJPRb). + * Before you file an issue read the [Contributing guide](./CONTRIBUTING.md). + * Check to make sure someone hasn't already opened a [similar issue](https://github.com/OpenAccess-AI-Collective/axolotl/issues). + - type: textarea + attributes: + label: What piece of documentation is affected? + description: Please link to the article you'd like to see updated. + validations: + required: true + - type: textarea + attributes: + label: What part(s) of the article would you like to see updated? + description: | + - Give as much detail as you can to help us understand the change you want to see. + - Why should the docs be changed? What use cases does it support? + - What is the expected outcome? + validations: + required: true + - type: textarea + attributes: + label: Additional Information + description: Add any other context or screenshots about the feature request here. + validations: + required: false + - type: checkboxes + id: acknowledgements + attributes: + label: 'Acknowledgements' + description: 'Please confirm the following:' + options: + - label: 'My issue title is concise, descriptive, and in title casing.' + required: true + - label: 'I have searched the existing issues to make sure this feature has not been requested yet.' + required: true + - label: 'I have provided enough information for the maintainers to understand and evaluate this request.' + required: true diff --git a/.github/ISSUE_TEMPLATE/feature-request.yaml b/.github/ISSUE_TEMPLATE/feature-request.yaml new file mode 100644 index 0000000000000000000000000000000000000000..39b6cb74e1c3b7876806dcbcd6c8a34df8e64e22 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature-request.yaml @@ -0,0 +1,63 @@ +name: Feature Request / Enhancement +description: Suggest a new feature or feature enhancement for the project +labels: ["enhancement", "needs triage"] +body: + - type: checkboxes + id: no-duplicate-issues + attributes: + label: "⚠️ Please check that this feature request hasn't been suggested before." + description: "There are two locations for previous feature requests. Please search in both. Thank you. The **Label filters** may help make your search more focussed." + options: + - label: "I searched previous [Ideas in Discussions](https://github.com/OpenAccess-AI-Collective/axolotl/discussions/categories/ideas) didn't find any similar feature requests." + required: true + - label: "I searched previous [Issues](https://github.com/OpenAccess-AI-Collective/axolotl/labels/enhancement) didn't find any similar feature requests." + required: true + + - type: textarea + id: feature-description + validations: + required: true + attributes: + label: "πŸ”– Feature description" + description: "A clear and concise description of what the feature request is." + placeholder: "You should add ..." + + - type: textarea + id: solution + validations: + required: true + attributes: + label: "βœ”οΈ Solution" + description: "A clear and concise description of what you want to happen, and why." + placeholder: "In my use-case, ..." + + - type: textarea + id: alternatives + validations: + required: false + attributes: + label: "❓ Alternatives" + description: "A clear and concise description of any alternative solutions or features you've considered." + placeholder: "I have considered ..." + + - type: textarea + id: additional-context + validations: + required: false + attributes: + label: "πŸ“ Additional Context" + description: "Add any other context or screenshots about the feature request here." + placeholder: "..." + + - type: checkboxes + id: acknowledgements + attributes: + label: 'Acknowledgements' + description: 'Please confirm the following:' + options: + - label: 'My issue title is concise, descriptive, and in title casing.' + required: true + - label: 'I have searched the existing issues to make sure this feature has not been requested yet.' + required: true + - label: 'I have provided enough information for the maintainers to understand and evaluate this request.' + required: true diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000000000000000000000000000000000000..562806287a31e70f3106c80413db395a17685c6b --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,27 @@ + + +# Description + + + +## Motivation and Context + + + + +## How has this been tested? + + + + + +## Screenshots (if appropriate) + +## Types of changes + + + +## Social Handles (Optional) + + + diff --git a/.github/SECURITY.md b/.github/SECURITY.md new file mode 100644 index 0000000000000000000000000000000000000000..aceb0d1a2e26eb5ad73f56692501b7682cb847b3 --- /dev/null +++ b/.github/SECURITY.md @@ -0,0 +1,9 @@ +# Security Policy + +## Supported Versions + +Due to the nature of the fast development that is happening in this project, only the latest released version can be supported. + +## Reporting a Vulnerability + +If you find a vulnerability, please contact us on [Discord](https://discord.gg/xcu3ECkH9a) rather than creating a GitHub issue to allow us some time to fix it before it is a known vulnerability to others. diff --git a/.github/SUPPORT.md b/.github/SUPPORT.md new file mode 100644 index 0000000000000000000000000000000000000000..e625ec6d1067cab5cf1aa06db49c6562ff16f2d4 --- /dev/null +++ b/.github/SUPPORT.md @@ -0,0 +1,10 @@ +# Support + +If you need help with this project or have questions, please: + +1. Check the documentation. +2. Search the existing issues and pull requests. +3. Create a new issue if your question is not answered or your problem is not solved. +4. Have a look in the [Discord server](https://discord.gg/HhrNrHJPRb) + +Please note that this project is maintained by volunteers who have limited availability. We'll do our best to address your questions and concerns in a timely manner. diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml new file mode 100644 index 0000000000000000000000000000000000000000..348a573a1c58e8112dac16c6772de5fe90bb15bd --- /dev/null +++ b/.github/release-drafter.yml @@ -0,0 +1,31 @@ +name-template: 'v$RESOLVED_VERSION' +tag-template: 'v$RESOLVED_VERSION' +categories: + - title: 'πŸš€ Features' + labels: + - 'feature' + - 'enhancement' + - title: 'πŸ› Bug Fixes' + labels: + - 'fix' + - 'bugfix' + - 'bug' + - title: '🧰 Maintenance' + label: 'chore' +change-template: '- $TITLE @$AUTHOR (#$NUMBER)' +change-title-escapes: '\<*_&' # You can add # and @ to disable mentions, and add ` to disable code blocks. +version-resolver: + major: + labels: + - 'major' + minor: + labels: + - 'minor' + patch: + labels: + - 'patch' + default: patch +template: | + ## What’s Changed + + $CHANGES diff --git a/.github/workflows/base.yml b/.github/workflows/base.yml new file mode 100644 index 0000000000000000000000000000000000000000..381cf21ac1323fb98cf044b1b19e7363d11d41c3 --- /dev/null +++ b/.github/workflows/base.yml @@ -0,0 +1,58 @@ +name: ci-cd-base + +on: + workflow_dispatch: + +jobs: + build-base: + if: github.repository_owner == 'OpenAccess-AI-Collective' + # this job needs to be run on self-hosted GPU runners... + runs-on: axolotl-gpu-runner + strategy: + fail-fast: false + matrix: + include: + - cuda: "118" + cuda_version: 11.8.0 + python_version: "3.10" + pytorch: 2.1.2 + torch_cuda_arch_list: "7.0 7.5 8.0 8.6 9.0+PTX" + - cuda: "121" + cuda_version: 12.1.0 + python_version: "3.10" + pytorch: 2.1.2 + torch_cuda_arch_list: "7.0 7.5 8.0 8.6 9.0+PTX" + - cuda: "121" + cuda_version: 12.1.0 + python_version: "3.11" + pytorch: 2.1.2 + torch_cuda_arch_list: "7.0 7.5 8.0 8.6 9.0+PTX" + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Docker metadata + id: metadata + uses: docker/metadata-action@v3 + with: + images: winglian/axolotl-base + - name: Login to Docker Hub + uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + - name: Build + uses: docker/build-push-action@v4 + with: + context: . + file: ./docker/Dockerfile-base + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.metadata.outputs.tags }}-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }} + labels: ${{ steps.metadata.outputs.labels }} + build-args: | + CUDA_VERSION=${{ matrix.cuda_version }} + CUDA=${{ matrix.cuda }} + PYTHON_VERSION=${{ matrix.python_version }} + PYTORCH_VERSION=${{ matrix.pytorch }} + TORCH_CUDA_ARCH_LIST=${{ matrix.torch_cuda_arch_list }} diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000000000000000000000000000000000000..16cf774d71ffd9b4b6f7bc1fdc4b5c4a98997de1 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,31 @@ +name: Publish Docs +on: + push: + branches: + - main + +permissions: + contents: write + pages: write + +jobs: + build-deploy: + runs-on: ubuntu-latest + steps: + - name: Check out repository + uses: actions/checkout@v4 + - name: Set up Quarto + uses: quarto-dev/quarto-actions/setup@v2 + - name: Setup Python + uses: actions/setup-python@v3 + with: + python-version: '3.10' + - name: install dependencies + run: | + python3 -m pip install jupyter + - name: Publish to GitHub Pages (and render) + uses: quarto-dev/quarto-actions/publish@v2 + with: + target: gh-pages + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000000000000000000000000000000000000..7b02804c22282eeb8e07ce7c4eb6c8e0da66b2a0 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,22 @@ +name: lint +on: + # check on PRs, and manual triggers + pull_request: + paths: + - '**.py' + - 'requirements.txt' + - '.github/workflows/*.yml' + - "*.md" + workflow_dispatch: + +jobs: + pre-commit: + name: pre-commit + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: "3.10" + cache: 'pip' # caching pip dependencies + - uses: pre-commit/action@v3.0.0 diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..9a8d99275933930e050275241a8207765917543e --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,119 @@ +name: ci-cd + +on: + push: + branches: + - "main" + workflow_dispatch: + +jobs: + build-axolotl: + if: ${{ ! contains(github.event.commits[0].message, '[skip docker]]') && github.repository_owner == 'OpenAccess-AI-Collective' }} + strategy: + fail-fast: false + matrix: + include: + - cuda: 118 + cuda_version: 11.8.0 + python_version: "3.10" + pytorch: 2.1.2 + axolotl_extras: + axolotl_args: "--extra-index-url https://download.pytorch.org/whl/cu118" + is_latest: true + - cuda: 121 + cuda_version: 12.1.0 + python_version: "3.10" + pytorch: 2.1.2 + axolotl_extras: + - cuda: 121 + cuda_version: 12.1.0 + python_version: "3.11" + pytorch: 2.1.2 + axolotl_extras: + runs-on: axolotl-gpu-runner + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Docker metadata + id: metadata + uses: docker/metadata-action@v5 + with: + images: winglian/axolotl + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + # guidance for testing before pushing: https://docs.docker.com/build/ci/github-actions/test-before-push/ + - name: Build and export to Docker + uses: docker/build-push-action@v5 + with: + context: . + build-args: | + BASE_TAG=${{ github.ref_name }}-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }} + CUDA=${{ matrix.cuda }} + PYTORCH_VERSION=${{ matrix.pytorch }} + AXOLOTL_ARGS=${{ matrix.axolotl_args }} + file: ./docker/Dockerfile + push: ${{ github.event_name != 'pull_request' }} + tags: | + ${{ steps.metadata.outputs.tags }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }} + ${{ (matrix.is_latest) && format('{0}-latest', steps.metadata.outputs.tags) || '' }} + labels: ${{ steps.metadata.outputs.labels }} + + build-axolotl-runpod: + needs: build-axolotl + if: ${{ ! contains(github.event.commits[0].message, '[skip docker]]') && github.repository_owner == 'OpenAccess-AI-Collective' }} + # this job needs to be run on self-hosted GPU runners... + strategy: + matrix: + include: + - cuda: 118 + cuda_version: 11.8.0 + python_version: "3.10" + pytorch: 2.1.2 + axolotl_extras: + is_latest: true + - cuda: 121 + cuda_version: 12.1.0 + python_version: "3.10" + pytorch: 2.1.2 + axolotl_extras: + - cuda: 121 + cuda_version: 12.1.0 + python_version: "3.11" + pytorch: 2.1.2 + axolotl_extras: + runs-on: axolotl-gpu-runner + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Docker metadata + id: metadata + uses: docker/metadata-action@v5 + with: + images: winglian/axolotl-cloud + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + - name: Build + uses: docker/build-push-action@v5 + with: + context: . + build-args: | + BASE_TAG=${{ github.ref_name }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }} + CUDA=${{ matrix.cuda }} + file: ./docker/Dockerfile-cloud + push: ${{ github.event_name != 'pull_request' }} + tags: | + ${{ steps.metadata.outputs.tags }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }} + winglian/axolotl-runpod:main-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }} + ${{ (matrix.is_latest) && format('{0}-latest', steps.metadata.outputs.tags) || '' }} + ${{ (matrix.is_latest) && format('{0}-latest', 'winglian/axolotl-runpod:main') || '' }} + labels: ${{ steps.metadata.outputs.labels }} diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml new file mode 100644 index 0000000000000000000000000000000000000000..dbd225f6f036eec6834e1ee40305c529893592d6 --- /dev/null +++ b/.github/workflows/pypi.yml @@ -0,0 +1,45 @@ +name: publish pypi + +on: + push: + tags: + - '*' + +jobs: + pypi-publish: + name: Upload release to PyPI + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/p/axolotl + permissions: + id-token: write # IMPORTANT: this permission is mandatory for trusted publishing + steps: + - name: Check out repository code + uses: actions/checkout@v3 + + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: "3.10" + + - name: Install dependencies + run: | + pip3 install wheel + pip3 install -e . + pip3 install -r requirements-tests.txt + + - name: Extract tag name + id: tag + run: echo ::set-output name=TAG_NAME::$(echo $GITHUB_REF | cut -d / -f 3) + + - name: Update version in setup.py + run: | + sed -i -E 's/version="([0-9.]+)",/version="${{ steps.tag.outputs.TAG_NAME }}",/g' setup.py + + - name: Build a binary wheel + run: | + python setup.py sdist bdist_wheel + + - name: Publish package distributions to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000000000000000000000000000000000000..d8ca5400a96258eee51c6116a0adc182de534c26 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,100 @@ +name: Tests +on: + # check on push/merge to main, PRs, and manual triggers + push: + branches: + - "main" + paths: + - '**.py' + - 'requirements.txt' + - '.github/workflows/*.yml' + pull_request: + paths: + - '**.py' + - 'requirements.txt' + - '.github/workflows/*.yml' + workflow_dispatch: + +jobs: + pre-commit: + name: pre-commit + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: "3.10" + cache: 'pip' # caching pip dependencies + - uses: pre-commit/action@v3.0.0 + + pytest: + name: PyTest + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python_version: ["3.10", "3.11"] + timeout-minutes: 20 + + steps: + - name: Check out repository code + uses: actions/checkout@v3 + + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python_version }} + cache: 'pip' # caching pip dependencies + + - name: Install dependencies + run: | + pip3 install -U -e . + pip3 install -r requirements-tests.txt + + - name: Run tests + run: | + pytest --ignore=tests/e2e/ tests/ + + docker-e2e-tests: + if: github.repository_owner == 'OpenAccess-AI-Collective' + # this job needs to be run on self-hosted GPU runners... + runs-on: [self-hosted, modal] + timeout-minutes: 60 + needs: [pre-commit, pytest] + + strategy: + fail-fast: false + matrix: + include: + - cuda: 118 + cuda_version: 11.8.0 + python_version: "3.10" + pytorch: 2.1.2 + axolotl_args: "--extra-index-url https://download.pytorch.org/whl/cu118" + num_gpus: 1 + - cuda: 121 + cuda_version: 12.1.0 + python_version: "3.10" + pytorch: 2.1.2 + num_gpus: 1 + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Install Python + uses: actions/setup-python@v5 + with: + python-version: "3.10" + - name: Install Modal + run: | + python -m pip install --upgrade pip + pip install modal jinja2 + - name: Update env vars + run: | + echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV + echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV + echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV + echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV + echo "N_GPUS=${{ matrix.num_gpus }}" >> $GITHUB_ENV + - name: Run tests job on Modal + run: | + modal run cicd.tests diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..589440abf6863580b610c5fd9d25d33a7c4951b7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,177 @@ +**/axolotl.egg-info +configs +last_run_prepared/ +.vscode +_site/ + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +.idea/ + +# WandB +# wandb creates a folder to store logs for training runs +wandb + +# Runs +lora-out/* +qlora-out/* +mlruns/* + +/.quarto/ diff --git a/.isort.cfg b/.isort.cfg new file mode 100644 index 0000000000000000000000000000000000000000..79067a7c91c9364b6a26abeaa8420725f25050c4 --- /dev/null +++ b/.isort.cfg @@ -0,0 +1,3 @@ +[settings] +profile=black +known_third_party=wandb diff --git a/.mypy.ini b/.mypy.ini new file mode 100644 index 0000000000000000000000000000000000000000..ede9fef8879415168f4207eea47aed4d668d1f44 --- /dev/null +++ b/.mypy.ini @@ -0,0 +1,51 @@ +[mypy] +plugins = pydantic.mypy +exclude = venv + +[mypy-alpaca_lora_4bit.*] +ignore_missing_imports = True + +[mypy-axolotl.monkeypatch.*] +ignore_errors = True + +[mypy-axolotl.models.mixtral.*] +ignore_errors = True + +[mypy-axolotl.models.phi.*] +ignore_errors = True + +[mypy-flash_attn.*] +ignore_missing_imports = True + +[mypy-huggingface_hub] +ignore_missing_imports = True + +[mypy-transformers.*] +ignore_missing_imports = True + +[mypy-peft] +ignore_missing_imports = True + +[mypy-wandb] +ignore_missing_imports = True + +[mypy-bitsandbytes] +ignore_missing_imports = True + +[mypy-requests] +ignore_missing_imports = True + +[mypy-datasets] +ignore_missing_imports = True + +[mypy-fire] +ignore_missing_imports = True + +[mypy-setuptools] +ignore_missing_imports = True + +[mypy-addict] +ignore_missing_imports = True + +[mypy-xformers.*] +ignore_missing_imports = True diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6c5f205897ed234667a47afe87f74b364cf119a3 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,43 @@ +default_language_version: + python: python3 + +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: check-yaml + - id: end-of-file-fixer + - id: trailing-whitespace +- repo: https://github.com/psf/black + rev: 23.3.0 + hooks: + - id: black +- repo: https://github.com/pycqa/isort + rev: 5.12.0 + hooks: + - id: isort +- repo: https://github.com/PyCQA/flake8 + rev: 6.0.0 + hooks: + - id: flake8 +- repo: https://github.com/PyCQA/pylint + rev: v2.17.4 + hooks: + - id: pylint +- repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.3.0 + hooks: + - id: mypy + additional_dependencies: + [ + 'types-PyYAML', + 'pydantic>=2.5.3', + ] +- repo: https://github.com/PyCQA/bandit + rev: 1.7.5 + hooks: + - id: bandit + args: [ + '--ini', + '.bandit', + ] diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000000000000000000000000000000000000..ed973d2859b1faa54ec53706a2e4ac15b70556a4 --- /dev/null +++ b/.pylintrc @@ -0,0 +1,14 @@ +[MASTER] +init-hook="from pylint.config import find_pylintrc; import os, sys; sys.path.append(os.path.dirname(find_pylintrc()))" + +[TYPECHECK] + +# List of members which are set dynamically and missed by Pylint inference +# system, and so shouldn't trigger E1101 when accessed. +generated-members=numpy.*, torch.* + + +[pylint.messages_control] +disable=missing-function-docstring, line-too-long, import-error, + too-many-arguments, too-many-locals, too-many-statements, too-many-branches, too-few-public-methods, + too-many-instance-attributes, fixme, import-outside-toplevel, logging-fstring-interpolation, diff --git a/FAQS.md b/FAQS.md new file mode 100644 index 0000000000000000000000000000000000000000..f3c9dd52536aa7e9579f8058961b437e80cc8162 --- /dev/null +++ b/FAQS.md @@ -0,0 +1,7 @@ +# FAQs + +- Can you train StableLM with this? Yes, but only with a single GPU atm. Multi GPU support is coming soon! Just waiting on this [PR](https://github.com/huggingface/transformers/pull/22874) +- Will this work with Deepspeed? That's still a WIP, but setting `export ACCELERATE_USE_DEEPSPEED=true` should work in some cases +- `Error invalid argument at line 359 in file /workspace/bitsandbytes/csrc/pythonInterface.c` +`/arrow/cpp/src/arrow/filesystem/s3fs.cc:2598: arrow::fs::FinalizeS3 was not called even though S3 was initialized.` +This could lead to a segmentation fault at exit. Try reinstalling bitsandbytes and transformers from source. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..d645695673349e3947e8e5ae42332d0ac3164cd7 --- /dev/null +++ b/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..027c476c3be69b8dda3d799876702fad1f82c6a3 --- /dev/null +++ b/README.md @@ -0,0 +1,1356 @@ +# Axolotl + +Axolotl is a tool designed to streamline the fine-tuning of various AI models, offering support for multiple configurations and architectures. + +Features: +- Train various Huggingface models such as llama, pythia, falcon, mpt +- Supports fullfinetune, lora, qlora, relora, and gptq +- Customize configurations using a simple yaml file or CLI overwrite +- Load different dataset formats, use custom formats, or bring your own tokenized datasets +- Integrated with xformer, flash attention, rope scaling, and multipacking +- Works with single GPU or multiple GPUs via FSDP or Deepspeed +- Easily run with Docker locally or on the cloud +- Log results and optionally checkpoints to wandb or mlflow +- And more! + + + phorm.ai + + + + + + + +
+ +## Table of Contents +- [Introduction](#axolotl) +- [Supported Features](#axolotl-supports) +- [Quickstart](#quickstart-) +- [Environment](#environment) + - [Docker](#docker) + - [Conda/Pip venv](#condapip-venv) + - [Cloud GPU](#cloud-gpu) - Latitude.sh, JarvisLabs, RunPod + - [Bare Metal Cloud GPU](#bare-metal-cloud-gpu) + - [Windows](#windows) + - [Mac](#mac) + - [Google Colab](#google-colab) + - [Launching on public clouds via SkyPilot](#launching-on-public-clouds-via-skypilot) +- [Dataset](#dataset) + - [How to Add Custom Prompts](#how-to-add-custom-prompts) + - [How to Use Custom Pretokenized Dataset](#how-to-use-your-custom-pretokenized-dataset) +- [Config](#config) + - [Train](#train) + - [Inference](#inference-playground) + - [Merge LORA to Base](#merge-lora-to-base) + - [Special Tokens](#special-tokens) +- Advanced Topics + - [Multipack](./docs/multipack.qmd) + - [RLHF & DPO](./docs/rlhf.qmd) +- [Common Errors](#common-errors-) + - [Tokenization Mismatch b/w Training & Inference](#tokenization-mismatch-bw-inference--training) +- [Debugging Axolotl](#debugging-axolotl) +- [Need Help?](#need-help-) +- [Badge](#badge-) +- [Community Showcase](#community-showcase) +- [Contributing](#contributing-) +- [Sponsors](#sponsors-) + + + +
+ axolotl +
+

+ Axolotl provides a unified repository for fine-tuning
a variety of AI models with ease
+

+

+ Go ahead and Axolotl questions!! +

+ pre-commit + PyTest Status +
+
+ +
+ +## Axolotl supports + +| | fp16/fp32 | lora | qlora | gptq | gptq w/flash attn | flash attn | xformers attn | +|-------------|:----------|:-----|-------|------|-------------------|------------|--------------| +| llama | βœ… | βœ… | βœ… | βœ… | βœ… | βœ… | βœ… | +| Mistral | βœ… | βœ… | βœ… | βœ… | βœ… | βœ… | βœ… | +| Mixtral-MoE | βœ… | βœ… | βœ… | ❓ | ❓ | ❓ | ❓ | +| Pythia | βœ… | βœ… | βœ… | ❌ | ❌ | ❌ | ❓ | +| cerebras | βœ… | βœ… | βœ… | ❌ | ❌ | ❌ | ❓ | +| btlm | βœ… | βœ… | βœ… | ❌ | ❌ | ❌ | ❓ | +| mpt | βœ… | ❌ | ❓ | ❌ | ❌ | ❌ | ❓ | +| falcon | βœ… | βœ… | βœ… | ❌ | ❌ | ❌ | ❓ | +| gpt-j | βœ… | βœ… | βœ… | ❌ | ❌ | ❓ | ❓ | +| XGen | βœ… | ❓ | βœ… | ❓ | ❓ | ❓ | βœ… | +| phi | βœ… | βœ… | βœ… | ❓ | ❓ | ❓ | ❓ | +| RWKV | βœ… | ❓ | ❓ | ❓ | ❓ | ❓ | ❓ | +| Qwen | βœ… | βœ… | βœ… | ❓ | ❓ | ❓ | ❓ | +| Gemma | βœ… | βœ… | βœ… | ❓ | ❓ | βœ… | ❓ | + +βœ…: supported +❌: not supported +❓: untested + +## Quickstart ⚑ + +Get started with Axolotl in just a few steps! This quickstart guide will walk you through setting up and running a basic fine-tuning task. + +**Requirements**: Python >=3.10 and Pytorch >=2.1.1. + +```bash +git clone https://github.com/OpenAccess-AI-Collective/axolotl +cd axolotl + +pip3 install packaging +pip3 install -e '.[flash-attn,deepspeed]' +``` + +### Usage +```bash +# preprocess datasets - optional but recommended +CUDA_VISIBLE_DEVICES="" python -m axolotl.cli.preprocess examples/openllama-3b/lora.yml + +# finetune lora +accelerate launch -m axolotl.cli.train examples/openllama-3b/lora.yml + +# inference +accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \ + --lora_model_dir="./lora-out" + +# gradio +accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \ + --lora_model_dir="./lora-out" --gradio + +# remote yaml files - the yaml config can be hosted on a public URL +# Note: the yaml config must directly link to the **raw** yaml +accelerate launch -m axolotl.cli.train https://raw.githubusercontent.com/OpenAccess-AI-Collective/axolotl/main/examples/openllama-3b/lora.yml +``` + +## Advanced Setup + +### Environment + +#### Docker + + ```bash + docker run --gpus '"all"' --rm -it winglian/axolotl:main-latest + ``` + + Or run on the current files for development: + + ```sh + docker compose up -d + ``` + +>[!Tip] +> If you want to debug axolotl or prefer to use Docker as your development environment, see the [debugging guide's section on Docker](docs/debugging.qmd#debugging-with-docker). + +
+ + Docker advanced + + A more powerful Docker command to run would be this: + + ```bash +docker run --privileged --gpus '"all"' --shm-size 10g --rm -it --name axolotl --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 --mount type=bind,src="${PWD}",target=/workspace/axolotl -v ${HOME}/.cache/huggingface:/root/.cache/huggingface winglian/axolotl:main-latest + ``` + + It additionally: + * Prevents memory issues when running e.g. deepspeed (e.g. you could hit SIGBUS/signal 7 error) through `--ipc` and `--ulimit` args. + * Persists the downloaded HF data (models etc.) and your modifications to axolotl code through `--mount`/`-v` args. + * The `--name` argument simply makes it easier to refer to the container in vscode (`Dev Containers: Attach to Running Container...`) or in your terminal. + * The `--privileged` flag gives all capabilities to the container. + * The `--shm-size 10g` argument increases the shared memory size. Use this if you see `exitcode: -7` errors using deepspeed. + + [More information on nvidia website](https://docs.nvidia.com/deeplearning/frameworks/user-guide/index.html#setincshmem) + +
+ +#### Conda/Pip venv + 1. Install python >=**3.10** + + 2. Install pytorch stable https://pytorch.org/get-started/locally/ + + 3. Install Axolotl along with python dependencies + ```bash + pip3 install packaging + pip3 install -e '.[flash-attn,deepspeed]' + ``` + 4. (Optional) Login to Huggingface to use gated models/datasets. + ```bash + huggingface-cli login + ``` + Get the token at huggingface.co/settings/tokens + +#### Cloud GPU + +For cloud GPU providers that support docker images, use [`winglian/axolotl-cloud:main-latest`](https://hub.docker.com/r/winglian/axolotl-cloud/tags) + +- on Latitude.sh use this [direct link](https://latitude.sh/blueprint/989e0e79-3bf6-41ea-a46b-1f246e309d5c) +- on JarvisLabs.ai use this [direct link](https://jarvislabs.ai/templates/axolotl) +- on RunPod use this [direct link](https://runpod.io/gsc?template=v2ickqhz9s&ref=6i7fkpdz) + +#### Bare Metal Cloud GPU + +##### LambdaLabs + +
+ + Click to Expand + + 1. Install python + ```bash + sudo apt update + sudo apt install -y python3.10 + + sudo update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 + sudo update-alternatives --config python # pick 3.10 if given option + python -V # should be 3.10 + + ``` + + 2. Install pip + ```bash + wget https://bootstrap.pypa.io/get-pip.py + python get-pip.py + ``` + + 3. Install torch + ```bash + pip3 install -U torch --index-url https://download.pytorch.org/whl/cu118 + ``` + + 4. Axolotl + ```bash + git clone https://github.com/OpenAccess-AI-Collective/axolotl + cd axolotl + + pip3 install packaging + pip3 install -e '.[flash-attn,deepspeed]' + pip3 install protobuf==3.20.3 + pip3 install -U --ignore-installed requests Pillow psutil scipy + ``` + + 5. Set path + ```bash + export LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH + ``` +
+ +##### GCP + +
+ +Click to Expand + +Use a Deeplearning linux OS with cuda and pytorch installed. Then follow instructions on quickstart. + +Make sure to run the below to uninstall xla. +```bash +pip uninstall -y torch_xla[tpu] +``` + +
+ +#### Windows +Please use WSL or Docker! + +#### Mac + +Use the below instead of the install method in QuickStart. +``` +pip3 install -e '.' +``` +More info: [mac.md](/docs/mac.qmd) + +#### Google Colab + +Please use this example [notebook](examples/colab-notebooks/colab-axolotl-example.ipynb). + +#### Launching on public clouds via SkyPilot +To launch on GPU instances (both on-demand and spot instances) on 7+ clouds (GCP, AWS, Azure, OCI, and more), you can use [SkyPilot](https://skypilot.readthedocs.io/en/latest/index.html): + +```bash +pip install "skypilot-nightly[gcp,aws,azure,oci,lambda,kubernetes,ibm,scp]" # choose your clouds +sky check +``` + +Get the [example YAMLs](https://github.com/skypilot-org/skypilot/tree/master/llm/axolotl) of using Axolotl to finetune `mistralai/Mistral-7B-v0.1`: +``` +git clone https://github.com/skypilot-org/skypilot.git +cd skypilot/llm/axolotl +``` + +Use one command to launch: +```bash +# On-demand +HF_TOKEN=xx sky launch axolotl.yaml --env HF_TOKEN + +# Managed spot (auto-recovery on preemption) +HF_TOKEN=xx BUCKET= sky spot launch axolotl-spot.yaml --env HF_TOKEN --env BUCKET +``` + +### Dataset + +Axolotl supports a variety of dataset formats. Below are some of the formats you can use. +Have dataset(s) in one of the following format (JSONL recommended): + +#### Pretraining + +- `completion`: raw corpus + ```json + {"text": "..."} + ``` + +Note: Axolotl usually loads the entire dataset into memory. This will be challenging for large datasets. Use the following config to enable streaming: + +```yaml +pretraining_dataset: # hf path only +``` + +#### Supervised finetuning + +##### Instruction + +- `alpaca`: instruction; input(optional) + ```json + {"instruction": "...", "input": "...", "output": "..."} + ``` + +
+ +See other formats + +- `jeopardy`: question and answer + ```json + {"question": "...", "category": "...", "answer": "..."} + ``` +- `oasst`: instruction + ```json + {"INSTRUCTION": "...", "RESPONSE": "..."} + ``` +- `gpteacher`: instruction; input(optional) + ```json + {"instruction": "...", "input": "...", "response": "..."} + ``` +- `reflection`: instruction with reflect; input(optional) + ```json + {"instruction": "...", "input": "...", "output": "...", "reflection": "...", "corrected": "..."} + ``` +- `explainchoice`: question, choices, (solution OR explanation) + ```json + {"question": "...", "choices": ["..."], "solution": "...", "explanation": "..."} + ``` +- `concisechoice`: question, choices, (solution OR explanation) + ```json + {"question": "...", "choices": ["..."], "solution": "...", "explanation": "..."} + ``` +- `summarizetldr`: article and summary + ```json + {"article": "...", "summary": "..."} + ``` +- `alpaca_chat`: basic instruct for alpaca chat + ```json + {"instruction": "...", "input": "...", "response": "..."} + ``` +- `alpaca_chat.load_qa`: question and answer for alpaca chat + ```json + {"question": "...", "answer": "..."} + ``` +- `alpaca_chat.load_concise`: question and answer for alpaca chat, for concise answers + ```json + {"instruction": "...", "input": "...", "response": "..."} + ``` +- `alpaca_chat.load_camel_ai`: question and answer for alpaca chat, for load_camel_ai + ```json + {"message_1": "...", "message_2": "..."} + ``` +- `alpaca_w_system.load_open_orca`: support for open orca datasets with included system prompts, instruct + ```json + {"system_prompt": "...", "question": "...", "response": "..."} + ``` +- `context_qa`: in context question answering from an article + ```json + {"article": "...", "question": "...", "answer": "..."} + ``` +- `context_qa.load_v2`: in context question answering (alternate) + ```json + {"context": "...", "question": "...", "answer": "..."} + ``` +- `context_qa.load_404`: in context question answering from an article, with default response for no answer from context + ```json + {"article": "...", "unanswerable_question": "..."} + ``` +- `creative_acr.load_answer`: instruction and revision + ```json + {"instruction": "...", "revision": "..."} + ``` +- `creative_acr.load_critique`: critique + ```json + {"scores": "...", "critiques": "...", "instruction": "...", "answer": "..."} + ``` +- `creative_acr.load_revise`: critique and revise + ```json + {"scores": "...", "critiques": "...", "instruction": "...", "answer": "...", "revision": "..."} + ``` +- `metharme`: instruction, adds additional eos tokens + ```json + {"prompt": "...", "generation": "..."} + ``` + +
+ +##### Template-Free + +- `input_output`: template-free prompt construction + ```json + {"segments": [{"label": true|false, "text": "..."}]} + ``` + +This is a special format that allows you to construct prompts without using templates. This is for advanced users who want more freedom with prompt construction. See [these docs](docs/input_output.qmd) for more details. + +##### Conversation + +- `sharegpt`: conversations where `from` is `human`/`gpt`. (optional: first row with role `system` to override default system prompt) + ```json + {"conversations": [{"from": "...", "value": "..."}]} + ``` + +
+ +See other formats + +- `pygmalion`: pygmalion + ```json + {"conversations": [{"role": "...", "value": "..."}]} + ``` +- `sharegpt.load_role`: conversations where `role` is used instead of `from` + ```json + {"conversations": [{"role": "...", "value": "..."}]} + ``` +- `sharegpt.load_guanaco`: conversations where `from` is `prompter`/`assistant` instead of default sharegpt + ```json + {"conversations": [{"from": "...", "value": "..."}]} + ``` +- `sharegpt_jokes`: creates a chat where bot is asked to tell a joke, then explain why the joke is funny + ```json + {"conversations": [{"title": "...", "text": "...", "explanation": "..."}]} + ``` + +
+ +Note: `type: sharegpt` opens a special config `conversation:` that enables conversions to many Conversation types. See dataset section under [all yaml options](#all-yaml-options). + +#### How to add custom prompts + +For a dataset that is preprocessed for instruction purposes: + +```json +{"input": "...", "output": "..."} +``` + +You can use this example in your YAML config: + +```yaml +datasets: + - path: repo + type: + system_prompt: "" + field_system: system + field_instruction: input + field_output: output + format: "[INST] {instruction} [/INST]" + no_input_format: "[INST] {instruction} [/INST]" +``` +See full config options under [all yaml options](#all-yaml-options). + +#### How to use your custom pretokenized dataset + +- Do not pass a `type:` +- Columns in Dataset must be exactly `input_ids`, `attention_mask`, `labels` + +```yaml +- path: ... +``` + +### Config + +See [examples](examples) for quick start. It is recommended to duplicate and modify to your needs. The most important options are: + +- model + ```yaml + base_model: ./llama-7b-hf # local or huggingface repo + ``` + Note: The code will load the right architecture. + +- dataset + ```yaml + datasets: + # huggingface repo + - path: vicgalle/alpaca-gpt4 + type: alpaca + + # huggingface repo with specific configuration/subset + - path: EleutherAI/pile + name: enron_emails + type: completion # format from earlier + field: text # Optional[str] default: text, field to use for completion data + + # huggingface repo with multiple named configurations/subsets + - path: bigcode/commitpackft + name: + - ruby + - python + - typescript + type: ... # unimplemented custom format + + # fastchat conversation + # See 'conversation' options: https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py + - path: ... + type: sharegpt + conversation: chatml # default: vicuna_v1.1 + + # local + - path: data.jsonl # or json + ds_type: json # see other options below + type: alpaca + + # dataset with splits, but no train split + - path: knowrohit07/know_sql + type: context_qa.load_v2 + train_on_split: validation + + # loading from s3 or gcs + # s3 creds will be loaded from the system default and gcs only supports public access + - path: s3://path_to_ds # Accepts folder with arrow/parquet or file path like above. Supports s3, gcs. + ... + + # Loading Data From a Public URL + # - The file format is `json` (which includes `jsonl`) by default. For different formats, adjust the `ds_type` option accordingly. + - path: https://some.url.com/yourdata.jsonl # The URL should be a direct link to the file you wish to load. URLs must use HTTPS protocol, not HTTP. + ds_type: json # this is the default, see other options below. + ``` + +- loading + ```yaml + load_in_4bit: true + load_in_8bit: true + + bf16: auto # require >=ampere, auto will detect if your GPU supports this and choose automatically. + fp16: # leave empty to use fp16 when bf16 is 'auto'. set to false if you want to fallback to fp32 + tf32: true # require >=ampere + + bfloat16: true # require >=ampere, use instead of bf16 when you don't want AMP (automatic mixed precision) + float16: true # use instead of fp16 when you don't want AMP + ``` + Note: Repo does not do 4-bit quantization. + +- lora + ```yaml + adapter: lora # 'qlora' or leave blank for full finetune + lora_r: 8 + lora_alpha: 16 + lora_dropout: 0.05 + lora_target_modules: + - q_proj + - v_proj + ``` + +
+ +All yaml options (click to expand) + +```yaml +# This is the huggingface model that contains *.pt, *.safetensors, or *.bin files +# This can also be a relative path to a model on disk +base_model: ./llama-7b-hf +# You can specify an ignore pattern if the model repo contains more than 1 model type (*.pt, etc) +base_model_ignore_patterns: +# If the base_model repo on hf hub doesn't include configuration .json files, +# You can set that here, or leave this empty to default to base_model +base_model_config: ./llama-7b-hf +# You can specify to choose a specific model revision from huggingface hub +revision_of_model: +# Optional tokenizer configuration path in case you want to use a different tokenizer +# than the one defined in the base model +tokenizer_config: +# If you want to specify the type of model to load, AutoModelForCausalLM is a good choice too +model_type: AutoModelForCausalLM +# Corresponding tokenizer for the model AutoTokenizer is a good choice +tokenizer_type: AutoTokenizer +# Trust remote code for untrusted source +trust_remote_code: +# use_fast option for tokenizer loading from_pretrained, default to True +tokenizer_use_fast: +# Whether to use the legacy tokenizer setting, defaults to True +tokenizer_legacy: +# Resize the model embeddings when new tokens are added to multiples of 32 +# This is reported to improve training speed on some models +resize_token_embeddings_to_32x: + +# (Internal use only) +# Used to identify which the model is based on +is_falcon_derived_model: +is_llama_derived_model: +is_qwen_derived_model: +# Please note that if you set this to true, `padding_side` will be set to "left" by default +is_mistral_derived_model: + +# optional overrides to the base model configuration +overrides_of_model_config: + # RoPE Scaling https://github.com/huggingface/transformers/pull/24653 + rope_scaling: + type: # linear | dynamic + factor: # float + +# optional overrides to the bnb 4bit quantization configuration +# https://huggingface.co/docs/transformers/main/main_classes/quantization#transformers.BitsAndBytesConfig +bnb_config_kwargs: + # These are default values + llm_int8_has_fp16_weight: false + bnb_4bit_quant_type: nf4 + bnb_4bit_use_double_quant: true + + +# Whether you are training a 4-bit GPTQ quantized model +gptq: true + +# This will attempt to quantize the model down to 8 bits and use adam 8 bit optimizer +load_in_8bit: true +# Use bitsandbytes 4 bit +load_in_4bit: + +# Use CUDA bf16 +bf16: true # bool or 'full' for `bf16_full_eval`. require >=ampere +# Use CUDA fp16 +fp16: true +# Use CUDA tf32 +tf32: true # require >=ampere + +# No AMP (automatic mixed precision) +bfloat16: true # require >=ampere +float16: true + +# Limit the memory for all available GPUs to this amount (if an integer, expressed in gigabytes); default: unset +gpu_memory_limit: 20GiB +# Do the LoRA/PEFT loading on CPU -- this is required if the base model is so large it takes up most or all of the available GPU VRAM, e.g. during a model and LoRA merge +lora_on_cpu: true + +# A list of one or more datasets to finetune the model with +datasets: + # HuggingFace dataset repo | s3://,gs:// path | "json" for local dataset, make sure to fill data_files + - path: vicgalle/alpaca-gpt4 + # The type of prompt to use for training. [alpaca, sharegpt, gpteacher, oasst, reflection] + type: alpaca # format | format: (chat/instruct) | .load_ + ds_type: # Optional[str] (json|arrow|parquet|text|csv) defines the datatype when path is a file + data_files: # Optional[str] path to source data files + shards: # Optional[int] number of shards to split data into + name: # Optional[str] name of dataset configuration to load + train_on_split: train # Optional[str] name of dataset split to load from + + # Optional[str] fastchat conversation type, only used with type: sharegpt + conversation: # Options (see Conversation 'name'): https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py + field_human: # Optional[str]. Human key to use for conversation. + field_model: # Optional[str]. Assistant key to use for conversation. + # Add additional keys from your dataset as input or output roles + roles: + input: # Optional[List[str]]. These will be masked based on train_on_input + output: # Optional[List[str]]. + + # Custom user instruction prompt + - path: repo + type: + # The below are defaults. only set what's needed if you use a different column name. + system_prompt: "" + system_format: "{system}" + field_system: system + field_instruction: instruction + field_input: input + field_output: output + + # Customizable to be single line or multi-line + # Use {instruction}/{input} as key to be replaced + # 'format' can include {input} + format: |- + User: {instruction} {input} + Assistant: + # 'no_input_format' cannot include {input} + no_input_format: "{instruction} " + + # For `completion` datsets only, uses the provided field instead of `text` column + field: + +# If false, the datasets will not be shuffled and will keep their original order in `datasets`. +# The same applies to the `test_datasets` option and the `pretraining_dataset` option. Default is true. +shuffle_merged_datasets: true + +# A list of one or more datasets to eval the model with. +# You can use either test_datasets, or val_set_size, but not both. +test_datasets: + - path: /workspace/data/eval.jsonl + ds_type: json + # You need to specify a split. For "json" datasets the default split is called "train". + split: train + type: completion + data_files: + - /workspace/data/eval.jsonl + +# use RL training: 'dpo', 'ipo', 'kto_pair' +rl: + +# Saves the desired chat template to the tokenizer_config.json for easier inferencing +# Currently supports chatml and inst (mistral/mixtral) +chat_template: chatml +# Changes the default system message +default_system_message: You are a helpful assistant. Please give a long and detailed answer. # Currently only supports chatml. +# Axolotl attempts to save the dataset as an arrow after packing the data together so +# subsequent training attempts load faster, relative path +dataset_prepared_path: data/last_run_prepared +# Push prepared dataset to hub +push_dataset_to_hub: # repo path +# The maximum number of processes to use while preprocessing your input dataset. This defaults to `os.cpu_count()` +# if not set. +dataset_processes: # defaults to os.cpu_count() if not set +# Keep dataset in memory while preprocessing +# Only needed if cached dataset is taking too much storage +dataset_keep_in_memory: +# push checkpoints to hub +hub_model_id: # private repo path to push finetuned model +# how to push checkpoints to hub +# https://huggingface.co/docs/transformers/v4.31.0/en/main_classes/trainer#transformers.TrainingArguments.hub_strategy +hub_strategy: +# Whether to use hf `use_auth_token` for loading datasets. Useful for fetching private datasets +# Required to be true when used in combination with `push_dataset_to_hub` +hf_use_auth_token: # boolean +# How much of the dataset to set aside as evaluation. 1 = 100%, 0.50 = 50%, etc. 0 for no eval. +val_set_size: 0.04 +# Num shards for whole dataset +dataset_shard_num: +# Index of shard to use for whole dataset +dataset_shard_idx: + +# The maximum length of an input to train with, this should typically be less than 2048 +# as most models have a token/context limit of 2048 +sequence_len: 2048 +# Pad inputs so each step uses constant sized buffers +# This will reduce memory fragmentation and may prevent OOMs, by re-using memory more efficiently +pad_to_sequence_len: +# Use efficient multi-packing with block diagonal attention and per sequence position_ids. Recommend set to 'true' +sample_packing: +# Set to 'false' if getting errors during eval with sample_packing on. +eval_sample_packing: +# You can set these packing optimizations AFTER starting a training at least once. +# The trainer will provide recommended values for these values. +sample_packing_eff_est: +total_num_tokens: + +# Passed through to transformers when loading the model when launched without accelerate +# Use `sequential` when training w/ model parallelism to limit memory +device_map: +# Defines the max memory usage per gpu on the system. Passed through to transformers when loading the model. +max_memory: + +# If you want to use 'lora' or 'qlora' or leave blank to train all parameters in original model +adapter: lora +# If you already have a lora model trained that you want to load, put that here. +# This means after training, if you want to test the model, you should set this to the value of `output_dir`. +# Note that if you merge an adapter to the base model, a new subdirectory `merged` will be created under the `output_dir`. +lora_model_dir: + +# LoRA hyperparameters +# For more details about the following options, see: +# https://www.anyscale.com/blog/fine-tuning-llms-lora-or-full-parameter-an-in-depth-analysis-with-llama-2 +lora_r: 8 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_modules: + - q_proj + - v_proj +# - k_proj +# - o_proj +# - gate_proj +# - down_proj +# - up_proj +lora_target_linear: # If true, will target all linear modules +peft_layers_to_transform: # The layer indices to transform, otherwise, apply to all layers + +# If you added new tokens to the tokenizer, you may need to save some LoRA modules because they need to know the new tokens. +# For LLaMA and Mistral, you need to save `embed_tokens` and `lm_head`. It may vary for other models. +# `embed_tokens` converts tokens to embeddings, and `lm_head` converts embeddings to token probabilities. +# https://github.com/huggingface/peft/issues/334#issuecomment-1561727994 +lora_modules_to_save: +# - embed_tokens +# - lm_head + +lora_fan_in_fan_out: false + +peft: + # Configuration options for loftq initialization for LoRA + # https://huggingface.co/docs/peft/developer_guides/quantization#loftq-initialization + loftq_config: + loftq_bits: # typically 4 bits + +# ReLoRA configuration +# Must use either 'lora' or 'qlora' adapter, and does not support fsdp or deepspeed +relora_steps: # Number of steps per ReLoRA restart +relora_warmup_steps: # Number of per-restart warmup steps +relora_anneal_steps: # Number of anneal steps for each relora cycle +relora_prune_ratio: # threshold for optimizer magnitude when pruning +relora_cpu_offload: # True to perform lora weight merges on cpu during restarts, for modest gpu memory savings + +# wandb configuration if you're using it +# Make sure your `WANDB_API_KEY` environment variable is set (recommended) or you login to wandb with `wandb login`. +wandb_mode: # "offline" to save run metadata locally and not sync to the server, "disabled" to turn off wandb +wandb_project: # Your wandb project name +wandb_entity: # A wandb Team name if using a Team +wandb_watch: +wandb_name: # Set the name of your wandb run +wandb_run_id: # Set the ID of your wandb run +wandb_log_model: # "checkpoint" to log model to wandb Artifacts every `save_steps` or "end" to log only at the end of training + +# mlflow configuration if you're using it +mlflow_tracking_uri: # URI to mlflow +mlflow_experiment_name: # Your experiment name +hf_mlflow_log_artifacts: # set to true to copy each saved checkpoint on each save to mlflow artifact registry + +# Where to save the full-finetuned model to +output_dir: ./completed-model + +# Whether to use torch.compile and which backend to use +torch_compile: # bool +torch_compile_backend: # Optional[str] + +# Training hyperparameters + +# If greater than 1, backpropagation will be skipped and the gradients will be accumulated for the given number of steps. +gradient_accumulation_steps: 1 +# The number of samples to include in each batch. This is the number of samples sent to each GPU. +micro_batch_size: 2 +eval_batch_size: +num_epochs: 4 +warmup_steps: 100 # cannot use with warmup_ratio +warmup_ratio: 0.05 # cannot use with warmup_steps +learning_rate: 0.00003 +lr_quadratic_warmup: +logging_steps: +eval_steps: # Leave empty to eval at each epoch, integers for every N steps. decimal for fraction of total steps +evals_per_epoch: # number of times per epoch to run evals, mutually exclusive with eval_steps +save_strategy: # Set to `no` to skip checkpoint saves +save_steps: # Leave empty to save at each epoch +saves_per_epoch: # number of times per epoch to save a checkpoint, mutually exclusive with save_steps +save_total_limit: # Checkpoints saved at a time +# Maximum number of iterations to train for. It precedes num_epochs which means that +# if both are set, num_epochs will not be guaranteed. +# e.g., when 1 epoch is 1000 steps => `num_epochs: 2` and `max_steps: 100` will train for 100 steps +max_steps: + +eval_table_size: # Approximate number of predictions sent to wandb depending on batch size. Enabled above 0. Default is 0 +eval_max_new_tokens: # Total number of tokens generated for predictions sent to wandb. Default is 128 +eval_causal_lm_metrics: # HF evaluate metrics used during evaluation. Default is ["sacrebleu", "comet", "ter", chrf] + +loss_watchdog_threshold: # High loss value, indicating the learning has broken down (a good estimate is ~2 times the loss at the start of training) +loss_watchdog_patience: # Number of high-loss steps in a row before the trainer aborts (default: 3) + +# Save model as safetensors (require safetensors package) +save_safetensors: + +# Whether to mask out or include the human's prompt from the training labels +train_on_inputs: false +# Group similarly sized data to minimize padding. +# May be slower to start, as it must download and sort the entire dataset. +# Note that training loss may have an oscillating pattern with this enabled. +group_by_length: false + +# Whether to use gradient checkpointing https://huggingface.co/docs/transformers/v4.18.0/en/performance#gradient-checkpointing +gradient_checkpointing: false +# additional kwargs to pass to the trainer for gradient checkpointing +# gradient_checkpointing_kwargs: +# use_reentrant: true + +# Stop training after this many evaluation losses have increased in a row +# https://huggingface.co/transformers/v4.2.2/_modules/transformers/trainer_callback.html#EarlyStoppingCallback +early_stopping_patience: 3 + +# Specify a scheduler and kwargs to use with the optimizer +lr_scheduler: # 'one_cycle' | 'log_sweep' | empty for cosine +lr_scheduler_kwargs: +cosine_min_lr_ratio: # decay lr to some percentage of the peak lr, e.g. cosine_min_lr_ratio=0.1 for 10% of peak lr +cosine_constant_lr_ratio: # freeze lr at some percentage of the step, e.g. cosine_constant_lr_ratio=0.8 means start cosine_min_lr at 80% of training step (https://arxiv.org/pdf/2308.04014.pdf) + +# For one_cycle optim +lr_div_factor: # Learning rate div factor + +# Specify optimizer +# Valid values are driven by the Transformers OptimizerNames class, see: +# https://github.com/huggingface/transformers/blob/95b374952dc27d8511541d6f5a4e22c9ec11fb24/src/transformers/training_args.py#L134 +# +# Note that not all optimizers may be available in your environment, ex: 'adamw_anyprecision' is part of +# torchdistx, 'adamw_bnb_8bit' is part of bnb.optim.Adam8bit, etc. When in doubt, it is recommended to start with the optimizer used +# in the examples/ for your model and fine-tuning use case. +# +# Valid values for 'optimizer' include: +# - adamw_hf +# - adamw_torch +# - adamw_torch_fused +# - adamw_torch_xla +# - adamw_apex_fused +# - adafactor +# - adamw_anyprecision +# - sgd +# - adagrad +# - adamw_bnb_8bit +# - lion_8bit +# - lion_32bit +# - paged_adamw_32bit +# - paged_adamw_8bit +# - paged_lion_32bit +# - paged_lion_8bit +# - galore_adamw +# - galore_adamw_8bit +# - galore_adafactor +# - galore_adamw_layerwise +# - galore_adamw_8bit_layerwise +# - galore_adafactor_layerwise +optimizer: +# Dictionary of arguments to pass to the optimizer +optim_args: +# For Galore Optimizers the following optim_args are available +# rank: # type: int +# update_proj_gap # type: int +# scale # type: float +# proj_type: # type: str, default = std + +# The target modules to optimize, i.e. the module names that you would like to train, right now this is used only for GaLore algorithm +optim_target_modules: +# - self_attn # for llama +# - mlp + +# Specify weight decay +weight_decay: +# adamw hyperparams +adam_beta1: +adam_beta2: +adam_epsilon: +# Gradient clipping max norm +max_grad_norm: + +# Augmentation techniques +# NEFT https://arxiv.org/abs/2310.05914, set this to a number (paper default is 5) to add noise to embeddings +# currently only supported on Llama and Mistral +neftune_noise_alpha: + +# Whether to bettertransformers +flash_optimum: +# Whether to use xformers attention patch https://github.com/facebookresearch/xformers: +xformers_attention: +# Whether to use flash attention patch https://github.com/Dao-AILab/flash-attention: +flash_attention: +flash_attn_cross_entropy: # Whether to use flash-attention cross entropy implementation - advanced use only +flash_attn_rms_norm: # Whether to use flash-attention rms norm implementation - advanced use only +flash_attn_fuse_qkv: # Whether to fuse QKV into a single operation +flash_attn_fuse_mlp: # Whether to fuse part of the MLP into a single operation +# Whether to use scaled-dot-product attention +# https://pytorch.org/docs/stable/generated/torch.nn.functional.scaled_dot_product_attention.html +sdp_attention: +# Shifted-sparse attention (only llama) - https://arxiv.org/pdf/2309.12307.pdf +s2_attention: +# Resume from a specific checkpoint dir +resume_from_checkpoint: +# If resume_from_checkpoint isn't set and you simply want it to start where it left off. +# Be careful with this being turned on between different models. +auto_resume_from_checkpoints: false + +# Don't mess with this, it's here for accelerate and torchrun +local_rank: + +# Add or change special tokens. +# If you add tokens here, you don't need to add them to the `tokens` list. +special_tokens: + # bos_token: "" + # eos_token: "" + # unk_token: "" + +# Add extra tokens. +tokens: + +# FSDP +fsdp: +fsdp_config: + +# Deepspeed config path. e.g., deepspeed_configs/zero3.json +deepspeed: + +# Advanced DDP Arguments +ddp_timeout: +ddp_bucket_cap_mb: +ddp_broadcast_buffers: + +# Path to torch distx for optim 'adamw_anyprecision' +torchdistx_path: + +# Set to HF dataset for type: 'completion' for streaming instead of pre-tokenize +pretraining_dataset: + +# Debug mode +debug: + +# Seed +seed: + +# Allow overwrite yml config using from cli +strict: +``` + +
+ +
+ Understanding of batch size and gradient accumulation steps +
+Gradient accumulation means accumulating gradients over several mini-batches and updating the model weights afterward. When the samples in each batch are diverse, this technique doesn't significantly impact learning. + +This method allows for effective training with larger effective batch sizes without needing proportionally larger memory. Here's why: + +1. **Memory Consumption with Batch Size**: The primary reason increasing the batch size impacts memory is due to the storage requirements for intermediate activations. When you forward propagate a batch through a network, you have to store the activations at each layer for each sample in the batch, because these activations are used during backpropagation to compute gradients. Therefore, larger batches mean more activations, leading to greater GPU memory consumption. + +2. **Gradient Accumulation**: With gradient accumulation, you're effectively simulating a larger batch size by accumulating gradients over several smaller batches (or micro-batches). However, at any given time, you're only forward and backward propagating a micro-batch. This means you only store activations for the micro-batch, not the full accumulated batch. As a result, you can simulate the effect of a larger batch size without the memory cost of storing activations for a large batch. + +**Example 1:** +Micro batch size: 3 +Gradient accumulation steps: 2 +Number of GPUs: 3 +Total batch size = 3 * 2 * 3 = 18 + +``` +| GPU 1 | GPU 2 | GPU 3 | +|----------------|----------------|----------------| +| S1, S2, S3 | S4, S5, S6 | S7, S8, S9 | +| e1, e2, e3 | e4, e5, e6 | e7, e8, e9 | +|----------------|----------------|----------------| +| β†’ (accumulate) | β†’ (accumulate) | β†’ (accumulate) | +|----------------|----------------|----------------| +| S10, S11, S12 | S13, S14, S15 | S16, S17, S18 | +| e10, e11, e12 | e13, e14, e15 | e16, e17, e18 | +|----------------|----------------|----------------| +| β†’ (apply) | β†’ (apply) | β†’ (apply) | + +Accumulated gradient for the weight w1 after the second iteration (considering all GPUs): +Total gradient for w1 = e1 + e2 + e3 + e4 + e5 + e6 + e7 + e8 + e9 + e10 + e11 + e12 + e13 + e14 + e15 + e16 + e17 + e18 + +Weight update for w1: +w1_new = w1_old - learning rate x (Total gradient for w1 / 18) +``` + +**Example 2:** +Micro batch size: 2 +Gradient accumulation steps: 1 +Number of GPUs: 3 +Total batch size = 2 * 1 * 3 = 6 + +``` +| GPU 1 | GPU 2 | GPU 3 | +|-----------|-----------|-----------| +| S1, S2 | S3, S4 | S5, S6 | +| e1, e2 | e3, e4 | e5, e6 | +|-----------|-----------|-----------| +| β†’ (apply) | β†’ (apply) | β†’ (apply) | + +Accumulated gradient for the weight w1 (considering all GPUs): +Total gradient for w1 = e1 + e2 + e3 + e4 + e5 + e6 + +Weight update for w1: +w1_new = w1_old - learning rate Γ— (Total gradient for w1 / 6) +``` + +
+ +### Train + +Run +```bash +accelerate launch -m axolotl.cli.train your_config.yml +``` + +> [!TIP] +> You can also reference a config file that is hosted on a public URL, for example `accelerate launch -m axolotl.cli.train https://yourdomain.com/your_config.yml` + +#### Preprocess dataset + +You can optionally pre-tokenize dataset with the following before finetuning. +This is recommended for large datasets. + +- Set `dataset_prepared_path:` to a local folder for saving and loading pre-tokenized dataset. +- (Optional): Set `push_dataset_to_hub: hf_user/repo` to push it to Huggingface. +- (Optional): Use `--debug` to see preprocessed examples. + +```bash +python -m axolotl.cli.preprocess your_config.yml +``` + +#### Multi-GPU + +Below are the options available in axolotl for training with multiple GPUs. Note that DeepSpeed +is the recommended multi-GPU option currently because FSDP may experience +[loss instability](https://github.com/huggingface/transformers/issues/26498). + +##### DeepSpeed + +Deepspeed is an optimization suite for multi-gpu systems allowing you to train much larger models than you +might typically be able to fit into your GPU's VRAM. More information about the various optimization types +for deepspeed is available at https://huggingface.co/docs/accelerate/main/en/usage_guides/deepspeed#what-is-integrated + +We provide several default deepspeed JSON configurations for ZeRO stage 1, 2, and 3. + +```yaml +deepspeed: deepspeed_configs/zero1.json +``` + +```shell +accelerate launch -m axolotl.cli.train examples/llama-2/config.py --deepspeed deepspeed_configs/zero1.json +``` + +##### FSDP + +- llama FSDP +```yaml +fsdp: + - full_shard + - auto_wrap +fsdp_config: + fsdp_offload_params: true + fsdp_state_dict_type: FULL_STATE_DICT + fsdp_transformer_layer_cls_to_wrap: LlamaDecoderLayer +``` + +##### FSDP + QLoRA + +Axolotl supports training with FSDP and QLoRA, see [these docs](docs/fsdp_qlora.qmd) for more information. + +##### Weights & Biases Logging + +Make sure your `WANDB_API_KEY` environment variable is set (recommended) or you login to wandb with `wandb login`. + +- wandb options +```yaml +wandb_mode: +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: +``` + +##### Special Tokens + +It is important to have special tokens like delimiters, end-of-sequence, beginning-of-sequence in your tokenizer's vocabulary. This will help you avoid tokenization issues and help your model train better. You can do this in axolotl like this: + +```yml +special_tokens: + bos_token: "" + eos_token: "" + unk_token: "" +tokens: # these are delimiters + - "<|im_start|>" + - "<|im_end|>" +``` + +When you include these tokens in your axolotl config, axolotl adds these tokens to the tokenizer's vocabulary. + +### Inference Playground + +Axolotl allows you to load your model in an interactive terminal playground for quick experimentation. +The config file is the same config file used for training. + +Pass the appropriate flag to the inference command, depending upon what kind of model was trained: + +- Pretrained LORA: + ```bash + python -m axolotl.cli.inference examples/your_config.yml --lora_model_dir="./lora-output-dir" + ``` +- Full weights finetune: + ```bash + python -m axolotl.cli.inference examples/your_config.yml --base_model="./completed-model" + ``` +- Full weights finetune w/ a prompt from a text file: + ```bash + cat /tmp/prompt.txt | python -m axolotl.cli.inference examples/your_config.yml \ + --base_model="./completed-model" --prompter=None --load_in_8bit=True + ``` +-- With gradio hosting + ```bash + python -m axolotl.cli.inference examples/your_config.yml --gradio + ``` + +Please use `--sample_packing False` if you have it on and receive the error similar to below: + +> RuntimeError: stack expects each tensor to be equal size, but got [1, 32, 1, 128] at entry 0 and [1, 32, 8, 128] at entry 1 + +### Merge LORA to base + +The following command will merge your LORA adapater with your base model. You can optionally pass the argument `--lora_model_dir` to specify the directory where your LORA adapter was saved, otherwhise, this will be inferred from `output_dir` in your axolotl config file. The merged model is saved in the sub-directory `{lora_model_dir}/merged`. + +```bash +python3 -m axolotl.cli.merge_lora your_config.yml --lora_model_dir="./completed-model" +``` + +You may need to use the `gpu_memory_limit` and/or `lora_on_cpu` config options to avoid running out of memory. If you still run out of CUDA memory, you can try to merge in system RAM with + +```bash +CUDA_VISIBLE_DEVICES="" python3 -m axolotl.cli.merge_lora ... +``` + +although this will be very slow, and using the config options above are recommended instead. + +## Common Errors 🧰 + +See also the [FAQ's](./docs/faq.qmd) and [debugging guide](docs/debugging.qmd). + +> If you encounter a 'Cuda out of memory' error, it means your GPU ran out of memory during the training process. Here's how to resolve it: + +Please reduce any below + - `micro_batch_size` + - `eval_batch_size` + - `gradient_accumulation_steps` + - `sequence_len` + +If it does not help, try running without deepspeed and without accelerate (replace "accelerate launch" with "python") in the command. + +Using adamw_bnb_8bit might also save you some memory. + +> `failed (exitcode: -9)` + +Usually means your system has run out of system memory. +Similarly, you should consider reducing the same settings as when you run out of VRAM. +Additionally, look into upgrading your system RAM which should be simpler than GPU upgrades. + +> RuntimeError: expected scalar type Float but found Half + +Try set `fp16: true` + +> NotImplementedError: No operator found for `memory_efficient_attention_forward` ... + +Try to turn off xformers. + +> accelerate config missing + +It's safe to ignore it. + +> NCCL Timeouts during training + +See the [NCCL](docs/nccl.qmd) guide. + + +### Tokenization Mismatch b/w Inference & Training + +For many formats, Axolotl constructs prompts by concatenating token ids _after_ tokenizing strings. The reason for concatenating token ids rather than operating on strings is to maintain precise accounting for attention masks. + +If you decode a prompt constructed by axolotl, you might see spaces between tokens (or lack thereof) that you do not expect, especially around delimiters and special tokens. When you are starting out with a new format, you should always do the following: + +1. Materialize some data using `python -m axolotl.cli.preprocess your_config.yml --debug`, and then decode the first few rows with your model's tokenizer. +2. During inference, right before you pass a tensor of token ids to your model, decode these tokens back into a string. +3. Make sure the inference string from #2 looks **exactly** like the data you fine tuned on from #1, including spaces and new lines. If they aren't the same, adjust your inference server accordingly. +4. As an additional troubleshooting step, you can look at the token ids between 1 and 2 to make sure they are identical. + +Having misalignment between your prompts during training and inference can cause models to perform very poorly, so it is worth checking this. See [this blog post](https://hamel.dev/notes/llm/05_tokenizer_gotchas.html) for a concrete example. + +## Debugging Axolotl + +See [this debugging guide](docs/debugging.qmd) for tips on debugging Axolotl, along with an example configuration for debugging with VSCode. + +## Need help? πŸ™‹ + +Join our [Discord server](https://discord.gg/HhrNrHJPRb) where we our community members can help you. + +Need dedicated support? Please contact us at [βœ‰οΈwing@openaccessaicollective.org](mailto:wing@openaccessaicollective.org) for dedicated support options. + +## Badge ❀🏷️ + +Building something cool with Axolotl? Consider adding a badge to your model card. + +```markdown +[Built with Axolotl](https://github.com/OpenAccess-AI-Collective/axolotl) +``` + +[Built with Axolotl](https://github.com/OpenAccess-AI-Collective/axolotl) + +## Community Showcase + +Check out some of the projects and models that have been built using Axolotl! Have a model you'd like to add to our Community Showcase? Open a PR with your model. + +Open Access AI Collective +- [Minotaur 13b](https://huggingface.co/openaccess-ai-collective/minotaur-13b-fixed) +- [Manticore 13b](https://huggingface.co/openaccess-ai-collective/manticore-13b) +- [Hippogriff 30b](https://huggingface.co/openaccess-ai-collective/hippogriff-30b-chat) + +PocketDoc Labs +- [Dan's PersonalityEngine 13b LoRA](https://huggingface.co/PocketDoc/Dans-PersonalityEngine-13b-LoRA) + +## Contributing 🀝 + +Please read the [contributing guide](./.github/CONTRIBUTING.md) + +Bugs? Please check the [open issues](https://github.com/OpenAccess-AI-Collective/axolotl/issues/bug) else create a new Issue. + +PRs are **greatly welcome**! + +Please run below to setup env +```bash +git clone https://github.com/OpenAccess-AI-Collective/axolotl +cd axolotl + +pip3 install packaging +pip3 install -e '.[flash-attn,deepspeed]' + +pip3 install -r requirements-dev.txt -r requirements-tests.txt +pre-commit install + +# test +pytest tests/ + +# optional: run against all files +pre-commit run --all-files +``` + +Thanks to all of our contributors to date. Help drive open source AI progress forward by contributing to Axolotl. + + + contributor chart by https://contrib.rocks + + +## Sponsors 🀝❀ + +OpenAccess AI Collective is run by volunteer contributors such as [winglian](https://github.com/winglian), +[NanoCode012](https://github.com/NanoCode012), [tmm1](https://github.com/tmm1), +[mhenrichsen](https://github.com/mhenrichsen), [casper-hansen](https://github.com/casper-hansen), +[hamelsmu](https://github.com/hamelsmu) and many more who help us accelerate forward by fixing bugs, answering +community questions and implementing new features. Axolotl needs donations from sponsors for the compute needed to +run our unit & integration tests, troubleshooting community issues, and providing bounties. If you love axolotl, +consider sponsoring the project via [GitHub Sponsors](https://github.com/sponsors/OpenAccess-AI-Collective), +[Ko-fi](https://ko-fi.com/axolotl_ai) or reach out directly to +[wing@openaccessaicollective.org](mailto:wing@openaccessaicollective.org). + +--- + +#### πŸ’Ž Diamond Sponsors - [Contact directly](mailto:wing@openaccessaicollective.org) + +--- + +#### πŸ₯‡ Gold Sponsors - $5000/mo + +--- + +#### πŸ₯ˆ Silver Sponsors - $1000/mo + +--- + +#### πŸ₯‰ Bronze Sponsors - $500/mo + + - [JarvisLabs.ai](https://jarvislabs.ai) + +--- diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000000000000000000000000000000000000..2002bbbaf1b8781e9f632856c032702151895bb2 --- /dev/null +++ b/TODO.md @@ -0,0 +1,10 @@ +# todo list + +- [] Validation of parameters for combinations that won't work + + + +## things that are known not to work + +- FSDP offload and gradient_checkpointing - https://github.com/pytorch/pytorch/issues/82203 +- adamw_bnb_8bit doesn't play well with FSDP offload diff --git a/_quarto.yml b/_quarto.yml new file mode 100644 index 0000000000000000000000000000000000000000..31aa90398e7a7e1bf6d5249dcad0271434c23b4c --- /dev/null +++ b/_quarto.yml @@ -0,0 +1,51 @@ +project: + type: website + +website: + title: "Axolotl" + description: "Fine-tuning" + favicon: favicon.jpg + navbar: + title: Axolotl + background: dark + pinned: false + collapse: false + tools: + - icon: twitter + href: https://twitter.com/axolotl_ai + - icon: github + href: https://github.com/OpenAccess-AI-Collective/axolotl/ + - icon: discord + href: https://discord.gg/7m9sfhzaf3 + + sidebar: + pinned: true + collapse-level: 2 + style: docked + contents: + - text: Home + href: index.qmd + - section: "How-To Guides" + contents: + # TODO Edit folder structure after we have more docs. + - docs/debugging.qmd + - docs/multipack.qmd + - docs/fdsp_qlora.qmd + - docs/input_output.qmd + - docs/rlhf.qmd + - docs/nccl.qmd + - docs/mac.qmd + - docs/multi-node.qmd + - section: "Reference" + contents: + - docs/config.qmd + - docs/faq.qmd + + + + +format: + html: + theme: materia + css: styles.css + toc: true diff --git a/axol/.gitignore b/axol/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..ede80b85f0f30f7d19ce1a46d6b421fac3c1f82f --- /dev/null +++ b/axol/.gitignore @@ -0,0 +1,2 @@ +# created by virtualenv automatically +* diff --git a/axol/bin/activate b/axol/bin/activate new file mode 100644 index 0000000000000000000000000000000000000000..32bb382debead3ff4a36cc1d40ea2dfda8ba2df8 --- /dev/null +++ b/axol/bin/activate @@ -0,0 +1,87 @@ +# This file must be used with "source bin/activate" *from bash* +# you cannot run it directly + + +if [ "${BASH_SOURCE-}" = "$0" ]; then + echo "You must source this script: \$ source $0" >&2 + exit 33 +fi + +deactivate () { + unset -f pydoc >/dev/null 2>&1 || true + + # reset old environment variables + # ! [ -z ${VAR+_} ] returns true if VAR is declared at all + if ! [ -z "${_OLD_VIRTUAL_PATH:+_}" ] ; then + PATH="$_OLD_VIRTUAL_PATH" + export PATH + unset _OLD_VIRTUAL_PATH + fi + if ! [ -z "${_OLD_VIRTUAL_PYTHONHOME+_}" ] ; then + PYTHONHOME="$_OLD_VIRTUAL_PYTHONHOME" + export PYTHONHOME + unset _OLD_VIRTUAL_PYTHONHOME + fi + + # The hash command must be called to get it to forget past + # commands. Without forgetting past commands the $PATH changes + # we made may not be respected + hash -r 2>/dev/null + + if ! [ -z "${_OLD_VIRTUAL_PS1+_}" ] ; then + PS1="$_OLD_VIRTUAL_PS1" + export PS1 + unset _OLD_VIRTUAL_PS1 + fi + + unset VIRTUAL_ENV + unset VIRTUAL_ENV_PROMPT + if [ ! "${1-}" = "nondestructive" ] ; then + # Self destruct! + unset -f deactivate + fi +} + +# unset irrelevant variables +deactivate nondestructive + +VIRTUAL_ENV='/workspace/axolotl/axol' +if ([ "$OSTYPE" = "cygwin" ] || [ "$OSTYPE" = "msys" ]) && $(command -v cygpath &> /dev/null) ; then + VIRTUAL_ENV=$(cygpath -u "$VIRTUAL_ENV") +fi +export VIRTUAL_ENV + +_OLD_VIRTUAL_PATH="$PATH" +PATH="$VIRTUAL_ENV/bin:$PATH" +export PATH + +if [ "x" != x ] ; then + VIRTUAL_ENV_PROMPT="" +else + VIRTUAL_ENV_PROMPT=$(basename "$VIRTUAL_ENV") +fi +export VIRTUAL_ENV_PROMPT + +# unset PYTHONHOME if set +if ! [ -z "${PYTHONHOME+_}" ] ; then + _OLD_VIRTUAL_PYTHONHOME="$PYTHONHOME" + unset PYTHONHOME +fi + +if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT-}" ] ; then + _OLD_VIRTUAL_PS1="${PS1-}" + PS1="(${VIRTUAL_ENV_PROMPT}) ${PS1-}" + export PS1 +fi + +# Make sure to unalias pydoc if it's already there +alias pydoc 2>/dev/null >/dev/null && unalias pydoc || true + +pydoc () { + python -m pydoc "$@" +} + +# The hash command must be called to get it to forget past +# commands. Without forgetting past commands the $PATH changes +# we made may not be respected +hash -r 2>/dev/null diff --git a/axol/bin/activate.csh b/axol/bin/activate.csh new file mode 100644 index 0000000000000000000000000000000000000000..66f353d085d89dea30de81e44c630a72ebe9c327 --- /dev/null +++ b/axol/bin/activate.csh @@ -0,0 +1,55 @@ +# This file must be used with "source bin/activate.csh" *from csh*. +# You cannot run it directly. +# Created by Davide Di Blasi . + +set newline='\ +' + +alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PATH:q" && unset _OLD_VIRTUAL_PATH; rehash; test $?_OLD_VIRTUAL_PROMPT != 0 && set prompt="$_OLD_VIRTUAL_PROMPT:q" && unset _OLD_VIRTUAL_PROMPT; unsetenv VIRTUAL_ENV; unsetenv VIRTUAL_ENV_PROMPT; test "\!:*" != "nondestructive" && unalias deactivate && unalias pydoc' + +# Unset irrelevant variables. +deactivate nondestructive + +setenv VIRTUAL_ENV '/workspace/axolotl/axol' + +set _OLD_VIRTUAL_PATH="$PATH:q" +setenv PATH "$VIRTUAL_ENV:q/bin:$PATH:q" + + + +if ('' != "") then + setenv VIRTUAL_ENV_PROMPT '' +else + setenv VIRTUAL_ENV_PROMPT "$VIRTUAL_ENV:t:q" +endif + +if ( $?VIRTUAL_ENV_DISABLE_PROMPT ) then + if ( $VIRTUAL_ENV_DISABLE_PROMPT == "" ) then + set do_prompt = "1" + else + set do_prompt = "0" + endif +else + set do_prompt = "1" +endif + +if ( $do_prompt == "1" ) then + # Could be in a non-interactive environment, + # in which case, $prompt is undefined and we wouldn't + # care about the prompt anyway. + if ( $?prompt ) then + set _OLD_VIRTUAL_PROMPT="$prompt:q" + if ( "$prompt:q" =~ *"$newline:q"* ) then + : + else + set prompt = '('"$VIRTUAL_ENV_PROMPT:q"') '"$prompt:q" + endif + endif +endif + +unset env_name +unset do_prompt + +alias pydoc python -m pydoc + +rehash diff --git a/axol/bin/activate.fish b/axol/bin/activate.fish new file mode 100644 index 0000000000000000000000000000000000000000..85f9a69843615da62a8178f408f6c4b0a4935bc5 --- /dev/null +++ b/axol/bin/activate.fish @@ -0,0 +1,103 @@ +# This file must be used using `source bin/activate.fish` *within a running fish ( http://fishshell.com ) session*. +# Do not run it directly. + +function _bashify_path -d "Converts a fish path to something bash can recognize" + set fishy_path $argv + set bashy_path $fishy_path[1] + for path_part in $fishy_path[2..-1] + set bashy_path "$bashy_path:$path_part" + end + echo $bashy_path +end + +function _fishify_path -d "Converts a bash path to something fish can recognize" + echo $argv | tr ':' '\n' +end + +function deactivate -d 'Exit virtualenv mode and return to the normal environment.' + # reset old environment variables + if test -n "$_OLD_VIRTUAL_PATH" + # https://github.com/fish-shell/fish-shell/issues/436 altered PATH handling + if test (echo $FISH_VERSION | head -c 1) -lt 3 + set -gx PATH (_fishify_path "$_OLD_VIRTUAL_PATH") + else + set -gx PATH $_OLD_VIRTUAL_PATH + end + set -e _OLD_VIRTUAL_PATH + end + + if test -n "$_OLD_VIRTUAL_PYTHONHOME" + set -gx PYTHONHOME "$_OLD_VIRTUAL_PYTHONHOME" + set -e _OLD_VIRTUAL_PYTHONHOME + end + + if test -n "$_OLD_FISH_PROMPT_OVERRIDE" + and functions -q _old_fish_prompt + # Set an empty local `$fish_function_path` to allow the removal of `fish_prompt` using `functions -e`. + set -l fish_function_path + + # Erase virtualenv's `fish_prompt` and restore the original. + functions -e fish_prompt + functions -c _old_fish_prompt fish_prompt + functions -e _old_fish_prompt + set -e _OLD_FISH_PROMPT_OVERRIDE + end + + set -e VIRTUAL_ENV + set -e VIRTUAL_ENV_PROMPT + + if test "$argv[1]" != 'nondestructive' + # Self-destruct! + functions -e pydoc + functions -e deactivate + functions -e _bashify_path + functions -e _fishify_path + end +end + +# Unset irrelevant variables. +deactivate nondestructive + +set -gx VIRTUAL_ENV '/workspace/axolotl/axol' + +# https://github.com/fish-shell/fish-shell/issues/436 altered PATH handling +if test (echo $FISH_VERSION | head -c 1) -lt 3 + set -gx _OLD_VIRTUAL_PATH (_bashify_path $PATH) +else + set -gx _OLD_VIRTUAL_PATH $PATH +end +set -gx PATH "$VIRTUAL_ENV"'/bin' $PATH + +# Prompt override provided? +# If not, just use the environment name. +if test -n '' + set -gx VIRTUAL_ENV_PROMPT '' +else + set -gx VIRTUAL_ENV_PROMPT (basename "$VIRTUAL_ENV") +end + +# Unset `$PYTHONHOME` if set. +if set -q PYTHONHOME + set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME + set -e PYTHONHOME +end + +function pydoc + python -m pydoc $argv +end + +if test -z "$VIRTUAL_ENV_DISABLE_PROMPT" + # Copy the current `fish_prompt` function as `_old_fish_prompt`. + functions -c fish_prompt _old_fish_prompt + + function fish_prompt + # Run the user's prompt first; it might depend on (pipe)status. + set -l prompt (_old_fish_prompt) + + printf '(%s) ' $VIRTUAL_ENV_PROMPT + + string join -- \n $prompt # handle multi-line prompts + end + + set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV" +end diff --git a/axol/bin/activate.nu b/axol/bin/activate.nu new file mode 100644 index 0000000000000000000000000000000000000000..caaf36ad37ded1cdfa37c9f3c08bca2a9ac6ba28 --- /dev/null +++ b/axol/bin/activate.nu @@ -0,0 +1,96 @@ +# virtualenv activation module +# Activate with `overlay use activate.nu` +# Deactivate with `deactivate`, as usual +# +# To customize the overlay name, you can call `overlay use activate.nu as foo`, +# but then simply `deactivate` won't work because it is just an alias to hide +# the "activate" overlay. You'd need to call `overlay hide foo` manually. + +export-env { + def is-string [x] { + ($x | describe) == 'string' + } + + def has-env [...names] { + $names | each {|n| + $n in $env + } | all {|i| $i == true} + } + + # Emulates a `test -z`, but btter as it handles e.g 'false' + def is-env-true [name: string] { + if (has-env $name) { + # Try to parse 'true', '0', '1', and fail if not convertible + let parsed = (do -i { $env | get $name | into bool }) + if ($parsed | describe) == 'bool' { + $parsed + } else { + not ($env | get -i $name | is-empty) + } + } else { + false + } + } + + let virtual_env = '/workspace/axolotl/axol' + let bin = 'bin' + + let is_windows = ($nu.os-info.family) == 'windows' + let path_name = (if (has-env 'Path') { + 'Path' + } else { + 'PATH' + } + ) + + let venv_path = ([$virtual_env $bin] | path join) + let new_path = ($env | get $path_name | prepend $venv_path) + + # If there is no default prompt, then use the env name instead + let virtual_env_prompt = (if ('' | is-empty) { + ($virtual_env | path basename) + } else { + '' + }) + + let new_env = { + $path_name : $new_path + VIRTUAL_ENV : $virtual_env + VIRTUAL_ENV_PROMPT : $virtual_env_prompt + } + + let new_env = (if (is-env-true 'VIRTUAL_ENV_DISABLE_PROMPT') { + $new_env + } else { + # Creating the new prompt for the session + let virtual_prefix = $'(char lparen)($virtual_env_prompt)(char rparen) ' + + # Back up the old prompt builder + let old_prompt_command = (if (has-env 'PROMPT_COMMAND') { + $env.PROMPT_COMMAND + } else { + '' + }) + + let new_prompt = (if (has-env 'PROMPT_COMMAND') { + if 'closure' in ($old_prompt_command | describe) { + {|| $'($virtual_prefix)(do $old_prompt_command)' } + } else { + {|| $'($virtual_prefix)($old_prompt_command)' } + } + } else { + {|| $'($virtual_prefix)' } + }) + + $new_env | merge { + PROMPT_COMMAND : $new_prompt + VIRTUAL_PREFIX : $virtual_prefix + } + }) + + # Environment variables that will be loaded as the virtual env + load-env $new_env +} + +export alias pydoc = python -m pydoc +export alias deactivate = overlay hide activate diff --git a/axol/bin/activate.ps1 b/axol/bin/activate.ps1 new file mode 100644 index 0000000000000000000000000000000000000000..47d185320bfabfc2f880e39ec645cac8223e1be6 --- /dev/null +++ b/axol/bin/activate.ps1 @@ -0,0 +1,61 @@ +$script:THIS_PATH = $myinvocation.mycommand.path +$script:BASE_DIR = Split-Path (Resolve-Path "$THIS_PATH/..") -Parent + +function global:deactivate([switch] $NonDestructive) { + if (Test-Path variable:_OLD_VIRTUAL_PATH) { + $env:PATH = $variable:_OLD_VIRTUAL_PATH + Remove-Variable "_OLD_VIRTUAL_PATH" -Scope global + } + + if (Test-Path function:_old_virtual_prompt) { + $function:prompt = $function:_old_virtual_prompt + Remove-Item function:\_old_virtual_prompt + } + + if ($env:VIRTUAL_ENV) { + Remove-Item env:VIRTUAL_ENV -ErrorAction SilentlyContinue + } + + if ($env:VIRTUAL_ENV_PROMPT) { + Remove-Item env:VIRTUAL_ENV_PROMPT -ErrorAction SilentlyContinue + } + + if (!$NonDestructive) { + # Self destruct! + Remove-Item function:deactivate + Remove-Item function:pydoc + } +} + +function global:pydoc { + python -m pydoc $args +} + +# unset irrelevant variables +deactivate -nondestructive + +$VIRTUAL_ENV = $BASE_DIR +$env:VIRTUAL_ENV = $VIRTUAL_ENV + +if ("" -ne "") { + $env:VIRTUAL_ENV_PROMPT = "" +} +else { + $env:VIRTUAL_ENV_PROMPT = $( Split-Path $env:VIRTUAL_ENV -Leaf ) +} + +New-Variable -Scope global -Name _OLD_VIRTUAL_PATH -Value $env:PATH + +$env:PATH = "$env:VIRTUAL_ENV/bin:" + $env:PATH +if (!$env:VIRTUAL_ENV_DISABLE_PROMPT) { + function global:_old_virtual_prompt { + "" + } + $function:_old_virtual_prompt = $function:prompt + + function global:prompt { + # Add the custom prefix to the existing prompt + $previous_prompt_value = & $function:_old_virtual_prompt + ("(" + $env:VIRTUAL_ENV_PROMPT + ") " + $previous_prompt_value) + } +} diff --git a/axol/bin/activate_this.py b/axol/bin/activate_this.py new file mode 100644 index 0000000000000000000000000000000000000000..d3076aeef31beecd2bde4d760a7dd95e9323b3e5 --- /dev/null +++ b/axol/bin/activate_this.py @@ -0,0 +1,37 @@ +""" +Activate virtualenv for current interpreter: + +Use exec(open(this_file).read(), {'__file__': this_file}). + +This can be used when you must use an existing Python interpreter, not the virtualenv bin/python. +""" # noqa: D415 + +from __future__ import annotations + +import os +import site +import sys + +try: + abs_file = os.path.abspath(__file__) +except NameError as exc: + msg = "You must use exec(open(this_file).read(), {'__file__': this_file})" + raise AssertionError(msg) from exc + +bin_dir = os.path.dirname(abs_file) +base = bin_dir[: -len("bin") - 1] # strip away the bin part from the __file__, plus the path separator + +# prepend bin to PATH (this file is inside the bin directory) +os.environ["PATH"] = os.pathsep.join([bin_dir, *os.environ.get("PATH", "").split(os.pathsep)]) +os.environ["VIRTUAL_ENV"] = base # virtual env is right above bin directory +os.environ["VIRTUAL_ENV_PROMPT"] = "" or os.path.basename(base) # noqa: SIM222 + +# add the virtual environments libraries to the host python import mechanism +prev_length = len(sys.path) +for lib in "../lib/python3.10/site-packages".split(os.pathsep): + path = os.path.realpath(os.path.join(bin_dir, lib)) + site.addsitedir(path.decode("utf-8") if "" else path) +sys.path[:] = sys.path[prev_length:] + sys.path[0:prev_length] + +sys.real_prefix = sys.prefix +sys.prefix = base diff --git a/axol/bin/pip b/axol/bin/pip new file mode 100644 index 0000000000000000000000000000000000000000..efc1b879f8aebacd746fed326afecbd09e103155 --- /dev/null +++ b/axol/bin/pip @@ -0,0 +1,8 @@ +#!/workspace/axolotl/axol/bin/python +# -*- coding: utf-8 -*- +import re +import sys +from pip._internal.cli.main import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/axol/bin/pip-3.10 b/axol/bin/pip-3.10 new file mode 100644 index 0000000000000000000000000000000000000000..efc1b879f8aebacd746fed326afecbd09e103155 --- /dev/null +++ b/axol/bin/pip-3.10 @@ -0,0 +1,8 @@ +#!/workspace/axolotl/axol/bin/python +# -*- coding: utf-8 -*- +import re +import sys +from pip._internal.cli.main import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/axol/bin/pip3 b/axol/bin/pip3 new file mode 100644 index 0000000000000000000000000000000000000000..efc1b879f8aebacd746fed326afecbd09e103155 --- /dev/null +++ b/axol/bin/pip3 @@ -0,0 +1,8 @@ +#!/workspace/axolotl/axol/bin/python +# -*- coding: utf-8 -*- +import re +import sys +from pip._internal.cli.main import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/axol/bin/pip3.10 b/axol/bin/pip3.10 new file mode 100644 index 0000000000000000000000000000000000000000..efc1b879f8aebacd746fed326afecbd09e103155 --- /dev/null +++ b/axol/bin/pip3.10 @@ -0,0 +1,8 @@ +#!/workspace/axolotl/axol/bin/python +# -*- coding: utf-8 -*- +import re +import sys +from pip._internal.cli.main import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/axol/bin/python b/axol/bin/python new file mode 100644 index 0000000000000000000000000000000000000000..90ae5eca127019bf91fc218cfbb655166879a507 --- /dev/null +++ b/axol/bin/python @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c37ac6a3327a3ef707c6e7df9888ed0b6713ccf4c1af3a7088aedbf87132d08a +size 17215080 diff --git a/axol/bin/python3 b/axol/bin/python3 new file mode 100644 index 0000000000000000000000000000000000000000..90ae5eca127019bf91fc218cfbb655166879a507 --- /dev/null +++ b/axol/bin/python3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c37ac6a3327a3ef707c6e7df9888ed0b6713ccf4c1af3a7088aedbf87132d08a +size 17215080 diff --git a/axol/bin/python3.10 b/axol/bin/python3.10 new file mode 100644 index 0000000000000000000000000000000000000000..90ae5eca127019bf91fc218cfbb655166879a507 --- /dev/null +++ b/axol/bin/python3.10 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c37ac6a3327a3ef707c6e7df9888ed0b6713ccf4c1af3a7088aedbf87132d08a +size 17215080 diff --git a/axol/bin/wheel b/axol/bin/wheel new file mode 100644 index 0000000000000000000000000000000000000000..30fec20ec51de9465fe8ebaafab31ca0cb15f634 --- /dev/null +++ b/axol/bin/wheel @@ -0,0 +1,8 @@ +#!/workspace/axolotl/axol/bin/python +# -*- coding: utf-8 -*- +import re +import sys +from wheel.cli import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/axol/bin/wheel-3.10 b/axol/bin/wheel-3.10 new file mode 100644 index 0000000000000000000000000000000000000000..30fec20ec51de9465fe8ebaafab31ca0cb15f634 --- /dev/null +++ b/axol/bin/wheel-3.10 @@ -0,0 +1,8 @@ +#!/workspace/axolotl/axol/bin/python +# -*- coding: utf-8 -*- +import re +import sys +from wheel.cli import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/axol/bin/wheel3 b/axol/bin/wheel3 new file mode 100644 index 0000000000000000000000000000000000000000..30fec20ec51de9465fe8ebaafab31ca0cb15f634 --- /dev/null +++ b/axol/bin/wheel3 @@ -0,0 +1,8 @@ +#!/workspace/axolotl/axol/bin/python +# -*- coding: utf-8 -*- +import re +import sys +from wheel.cli import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/axol/bin/wheel3.10 b/axol/bin/wheel3.10 new file mode 100644 index 0000000000000000000000000000000000000000..30fec20ec51de9465fe8ebaafab31ca0cb15f634 --- /dev/null +++ b/axol/bin/wheel3.10 @@ -0,0 +1,8 @@ +#!/workspace/axolotl/axol/bin/python +# -*- coding: utf-8 -*- +import re +import sys +from wheel.cli import main +if __name__ == '__main__': + sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0]) + sys.exit(main()) diff --git a/axol/pyvenv.cfg b/axol/pyvenv.cfg new file mode 100644 index 0000000000000000000000000000000000000000..ec1a4eb8e541d7b095d650b91dcc4a3ec4c31ec3 --- /dev/null +++ b/axol/pyvenv.cfg @@ -0,0 +1,8 @@ +home = /opt/conda/envs/axol/bin +implementation = CPython +version_info = 3.10.14.final.0 +virtualenv = 20.25.1 +include-system-site-packages = false +base-prefix = /opt/conda/envs/axol +base-exec-prefix = /opt/conda/envs/axol +base-executable = /opt/conda/envs/axol/bin/python3.10 diff --git a/cicd/Dockerfile.jinja b/cicd/Dockerfile.jinja new file mode 100644 index 0000000000000000000000000000000000000000..19d7a09de5592790bac885618c9cc3e377da298e --- /dev/null +++ b/cicd/Dockerfile.jinja @@ -0,0 +1,39 @@ +FROM winglian/axolotl-base:{{ BASE_TAG }} + +ENV TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6+PTX" +ENV AXOLOTL_EXTRAS="{{ AXOLOTL_EXTRAS }}" +ENV AXOLOTL_ARGS="{{ AXOLOTL_ARGS }}" +ENV CUDA="{{ CUDA }}" +ENV BNB_CUDA_VERSION="{{ CUDA }}" +ENV PYTORCH_VERSION="{{ PYTORCH_VERSION }}" +ENV GITHUB_REF="{{ GITHUB_REF }}" +ENV GITHUB_SHA="{{ GITHUB_SHA }}" + +RUN apt-get update && \ + apt-get install -y --allow-change-held-packages vim curl nano libnccl2 libnccl-dev + +WORKDIR /workspace + +RUN git clone --depth=1 https://github.com/OpenAccess-AI-Collective/axolotl.git + +WORKDIR /workspace/axolotl + +RUN git fetch origin +$GITHUB_REF && \ + git checkout FETCH_HEAD + +# If AXOLOTL_EXTRAS is set, append it in brackets +RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \ + pip install -e .[deepspeed,flash-attn,mamba-ssm,galore,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \ + else \ + pip install -e .[deepspeed,flash-attn,mamba-ssm,galore] $AXOLOTL_ARGS; \ + fi + +# So we can test the Docker image +RUN pip install pytest + +# fix so that git fetch/pull from remote works +RUN git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*" && \ + git config --get remote.origin.fetch + +# helper for huggingface-login cli +RUN git config --global credential.helper store diff --git a/cicd/cicd.sh b/cicd/cicd.sh new file mode 100644 index 0000000000000000000000000000000000000000..fa2049b6bd685e4fa59c29e49bf8699f80c4acc3 --- /dev/null +++ b/cicd/cicd.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +pytest --ignore=tests/e2e/ /workspace/axolotl/tests/ +pytest /workspace/axolotl/tests/e2e/patched/ +pytest --ignore=tests/e2e/patched/ /workspace/axolotl/tests/e2e/ diff --git a/cicd/tests.py b/cicd/tests.py new file mode 100644 index 0000000000000000000000000000000000000000..bfbdb7b90acbe203dfdf415f29df794996f00eff --- /dev/null +++ b/cicd/tests.py @@ -0,0 +1,75 @@ +""" + modal application to run axolotl gpu tests in Modal + """ +import os +import pathlib +import tempfile + +import jinja2 +import modal +from jinja2 import select_autoescape +from modal import Image, Stub + +cicd_path = pathlib.Path(__file__).parent.resolve() + +template_loader = jinja2.FileSystemLoader(searchpath=cicd_path) +template_env = jinja2.Environment( + loader=template_loader, autoescape=select_autoescape() +) +df_template = template_env.get_template("Dockerfile.jinja") + +df_args = { + "AXOLOTL_EXTRAS": os.environ.get("AXOLOTL_EXTRAS", ""), + "AXOLOTL_ARGS": os.environ.get("AXOLOTL_ARGS", ""), + "PYTORCH_VERSION": os.environ.get("PYTORCH_VERSION", "2.0.1"), + "BASE_TAG": os.environ.get("BASE_TAG", "main-base-py3.10-cu118-2.0.1"), + "CUDA": os.environ.get("CUDA", "118"), + "GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"), + "GITHUB_SHA": os.environ.get("GITHUB_SHA", ""), +} + +dockerfile_contents = df_template.render(**df_args) + +temp_dir = tempfile.mkdtemp() +with open(pathlib.Path(temp_dir) / "Dockerfile", "w", encoding="utf-8") as f: + f.write(dockerfile_contents) + +cicd_image = ( + Image.from_dockerfile( + pathlib.Path(temp_dir) / "Dockerfile", + force_build=True, + gpu="A10G", + ) + .env(df_args) + .pip_install("fastapi==0.110.0", "pydantic==2.6.3") +) + +stub = Stub("Axolotl CI/CD", secrets=[]) + + +N_GPUS = int(os.environ.get("N_GPUS", 1)) +GPU_CONFIG = modal.gpu.A10G(count=N_GPUS) + + +def run_cmd(cmd: str, run_folder: str): + import subprocess # nosec + + # Propagate errors from subprocess. + if exit_code := subprocess.call(cmd.split(), cwd=run_folder): # nosec + exit(exit_code) # pylint: disable=consider-using-sys-exit + + +@stub.function( + image=cicd_image, + gpu=GPU_CONFIG, + timeout=45 * 60, + cpu=8.0, + memory=131072, +) +def cicd_pytest(): + run_cmd("./cicd/cicd.sh", "/workspace/axolotl") + + +@stub.local_entrypoint() +def main(): + cicd_pytest.remote() diff --git a/config-2.yml b/config-2.yml new file mode 100644 index 0000000000000000000000000000000000000000..561054c8602ffa3d5792074df4826a58e78ec828 --- /dev/null +++ b/config-2.yml @@ -0,0 +1,75 @@ +#Mistral-7b-Instruct-v0.2 +base_model: mistralai/Mistral-7B-Instruct-v0.2 +model_type: MistralForCausalLM +tokenizer_type: LlamaTokenizer +is_mistral_derived_model: true + +load_in_8bit: true +load_in_4bit: false +strict: false + +datasets: + - path: menna/tutor-synthetic-v2 + type: completion + field: text +dataset_prepared_path: last_run_prepared +hub_model_id: menna/mistral-synthetic-tutor-v2 +val_set_size: 0.05 +output_dir: ./out + +adapter: lora +lora_r: 8 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_modules: + - q_proj + - v_proj + +sequence_len: 512 +sample_packing: false +pad_to_sequence_len: true + +wandb_project: mistral-synthetic-tutor-v2 +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: checkpoint + +#only 2 epochs because of small dataset +gradient_accumulation_steps: 3 +micro_batch_size: 2 +num_epochs: 3 +optimizer: adamw_bnb_8bit +lr_scheduler: cosine +learning_rate: 0.0002 + +train_on_inputs: false +group_by_length: false +bf16: true +fp16: false +tf32: false + +chat_template: inst + +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: false + +warmup_steps: 10 +evals_per_epoch: 4 +eval_table_size: +eval_table_max_new_tokens: 128 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: + bos_token: "" + eos_token: "" + unk_token: "" diff --git a/config.yaml b/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..97d5277f8f9f2e1b58ba0cfba371c1e17b2255e1 --- /dev/null +++ b/config.yaml @@ -0,0 +1,77 @@ +#Mistral-7b-Instruct-v0.2 +base_model: mistralai/Mistral-7B-Instruct-v0.2 +model_type: MistralForCausalLM +tokenizer_type: LlamaTokenizer +is_mistral_derived_model: true + +load_in_8bit: true +load_in_4bit: false +strict: false + +datasets: + - path: menna/tutor-synthetic + type: completion + field: text +dataset_prepared_path: last_run_prepared +hub_model_id: menna/mistral-synthetic-tutor +val_set_size: 0.05 +output_dir: ./out + +adapter: lora +lora_r: 8 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_modules: + - q_proj + - v_proj + +sequence_len: 512 +sample_packing: false +pad_to_sequence_len: true + +wandb_project: mistral-synthetic-tutor +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: checkpoint + +#only 2 epochs because of small dataset +gradient_accumulation_steps: 3 +micro_batch_size: 2 +num_epochs: 3 +optimizer: adamw_bnb_8bit +lr_scheduler: cosine +learning_rate: 0.0002 + +train_on_inputs: false +group_by_length: false +bf16: true +fp16: false +tf32: false + +chat_template: inst + +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true + +warmup_steps: 10 +evals_per_epoch: 4 +eval_table_size: +eval_table_max_new_tokens: 128 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: + bos_token: "" + eos_token: "" + unk_token: "> ~/.bashrc && \ + printf "[ ! -z \"\$TERM\" -a -r /etc/motd ] && cat /etc/motd\n" >> ~/.bashrc && \ + chmod +x /workspace/axolotl/scripts/cloud-entrypoint.sh && \ + chmod +x /root/cloud-entrypoint.sh + +ENTRYPOINT ["/root/cloud-entrypoint.sh"] +CMD ["sleep", "infinity"] diff --git a/docker/Dockerfile-tests b/docker/Dockerfile-tests new file mode 100644 index 0000000000000000000000000000000000000000..d5c0595a8771424ceb1e3cab88bd74314a1e3abe --- /dev/null +++ b/docker/Dockerfile-tests @@ -0,0 +1,41 @@ +ARG BASE_TAG=main-base +FROM winglian/axolotl-base:$BASE_TAG + +ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6+PTX" +ARG AXOLOTL_EXTRAS="" +ARG AXOLOTL_ARGS="" +ARG CUDA="118" +ENV BNB_CUDA_VERSION=$CUDA +ARG PYTORCH_VERSION="2.1.2" +ARG GITHUB_REF="main" + +ENV PYTORCH_VERSION=$PYTORCH_VERSION + +RUN apt-get update && \ + apt-get install -y --allow-change-held-packages vim curl nano libnccl2 libnccl-dev + +WORKDIR /workspace + +RUN git clone --depth=1 https://github.com/OpenAccess-AI-Collective/axolotl.git + +WORKDIR /workspace/axolotl + +RUN git fetch origin +$GITHUB_REF && \ + git checkout FETCH_HEAD + +# If AXOLOTL_EXTRAS is set, append it in brackets +RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \ + pip install -e .[deepspeed,flash-attn,mamba-ssm,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \ + else \ + pip install -e .[deepspeed,flash-attn,mamba-ssm] $AXOLOTL_ARGS; \ + fi + +# So we can test the Docker image +RUN pip install pytest + +# fix so that git fetch/pull from remote works +RUN git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*" && \ + git config --get remote.origin.fetch + +# helper for huggingface-login cli +RUN git config --global credential.helper store diff --git a/docs/.gitignore b/docs/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..4c23a061fa46d2db78ad1dc344484d953bd1a711 --- /dev/null +++ b/docs/.gitignore @@ -0,0 +1,2 @@ +/.quarto/ +_site/ diff --git a/docs/config.qmd b/docs/config.qmd new file mode 100644 index 0000000000000000000000000000000000000000..d93b170e7bdb97503d0807ebdea0163ce7838e17 --- /dev/null +++ b/docs/config.qmd @@ -0,0 +1,17 @@ +--- +title: Config options +description: A complete list of all configuration options. +--- + +```{python} +#|echo: false +#|output: asis +import re +# Regex pattern to match the YAML block including its code fence +pattern = r']*id="all-yaml-options"[^>]*>.*?All yaml options.*?```yaml(.*?)```.*?' + +with open('../README.md', 'r') as f: + doc = f.read() +match = re.search(pattern, doc, re.DOTALL) +print("```yaml", match.group(1).strip(), "```", sep="\n") +``` diff --git a/docs/debugging.qmd b/docs/debugging.qmd new file mode 100644 index 0000000000000000000000000000000000000000..7237fbd6f22f592f0ff58d9941685926817fe738 --- /dev/null +++ b/docs/debugging.qmd @@ -0,0 +1,245 @@ +--- +title: Debugging +description: How to debug Axolotl +--- + + +This document provides some tips and tricks for debugging Axolotl. It also provides an example configuration for debugging with VSCode. A good debugging setup is essential to understanding how Axolotl code works behind the scenes. + +## Table of Contents + +- [General Tips](#general-tips) +- [Debugging with VSCode](#debugging-with-vscode) + - [Background](#background) + - [Configuration](#configuration) + - [Customizing your debugger](#customizing-your-debugger) + - [Video Tutorial](#video-tutorial) +- [Debugging With Docker](#debugging-with-docker) + - [Setup](#setup) + - [Attach To Container](#attach-to-container) + - [Video - Attaching To Docker On Remote Host](#video---attaching-to-docker-on-remote-host) + +## General Tips + +While debugging it's helpful to simplify your test scenario as much as possible. Here are some tips for doing so: + +> [!Important] +> All of these tips are incorporated into the [example configuration](#configuration) for debugging with VSCode below. + +1. **Make sure you are using the latest version of axolotl**: This project changes often and bugs get fixed fast. Check your git branch and make sure you have pulled the latest changes from `main`. +1. **Eliminate concurrency**: Restrict the number of processes to 1 for both training and data preprocessing: + - Set `CUDA_VISIBLE_DEVICES` to a single GPU, ex: `export CUDA_VISIBLE_DEVICES=0`. + - Set `dataset_processes: 1` in your axolotl config or run the training command with `--dataset_processes=1`. +2. **Use a small dataset**: Construct or use a small dataset from HF Hub. When using a small dataset, you will often have to make sure `sample_packing: False` and `eval_sample_packing: False` to avoid errors. If you are in a pinch and don't have time to construct a small dataset but want to use from the HF Hub, you can shard the data (this will still tokenize the entire dataset, but will only use a fraction of the data for training. For example, to shard the dataset into 20 pieces, add the following to your axolotl config): + ```yaml + dataset: + ... + shards: 20 + ``` +3. **Use a small model**: A good example of a small model is [TinyLlama/TinyLlama-1.1B-Chat-v1.0](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0). +4. **Minimize iteration time**: Make sure the training loop finishes as fast as possible, with these settings. + - `micro_batch_size: 1` + - `max_steps: 1` + - `val_set_size: 0` +5. **Clear Caches:** Axolotl caches certain steps and so does the underlying HuggingFace trainer. You may want to clear some of these caches when debugging. + - Data preprocessing: When debugging data preprocessing, which includes prompt template formation, you may want to delete the directory set in `dataset_prepared_path:` in your axolotl config. If you didn't set this value, the default is `last_run_prepared`. + - HF Hub: If you are debugging data preprocessing, you should clear the relevant HF cache [HuggingFace cache](https://huggingface.co/docs/datasets/cache), by deleting the appropriate `~/.cache/huggingface/datasets/...` folder(s). + - **The recommended approach is to redirect all outputs and caches to a temporary folder and delete selected subfolders before each run. This is demonstrated in the example configuration below.** + + +## Debugging with VSCode + +### Background + +The below example shows how to configure VSCode to debug data preprocessing of the `sharegpt` format. This is the format used when you have the following in your axolotl config: + +```yaml +datasets: + - path: # example on HF Hub: philschmid/guanaco-sharegpt-style + type: sharegpt +``` + +>[!Important] +> If you are already familiar with advanced VSCode debugging, you can skip the below explanation and look at the files [.vscode/launch.json](../.vscode/launch.json) and [.vscode/tasks.json](../.vscode/tasks.json) for an example configuration. + +>[!Tip] +> If you prefer to watch a video, rather than read, you can skip to the [video tutorial](#video-tutorial) below (but doing both is recommended). + +### Setup + +Make sure you have an [editable install](https://setuptools.pypa.io/en/latest/userguide/development_mode.html) of Axolotl, which ensures that changes you make to the code are reflected at runtime. Run the following commands from the root of this project: + +```bash +pip3 install packaging +pip3 install -e '.[flash-attn,deepspeed]' +``` + +#### Remote Hosts + +If you developing on a remote host, you can easily use VSCode to debug remotely. To do so, you will need to follow this [remote - SSH guide](https://code.visualstudio.com/docs/remote/ssh). You can also see the video below on [Docker and Remote SSH debugging](#video---attaching-to-docker-on-remote-host). + + +### Configuration + +The easiest way to get started is to modify the [.vscode/launch.json](../.vscode/launch.json) file in this project. This is just an example configuration, so you may need to modify or copy it to suit your needs. + +For example, to mimic the command `cd devtools && CUDA_VISIBLE_DEVICES=0 accelerate launch -m axolotl.cli.train dev_sharegpt.yml`, you would use the below configuration[^1]. Note that we add additional flags that override the axolotl config and incorporate the tips above (see the comments). We also set the working directory to `devtools` and set the `env` variable `HF_HOME` to a temporary folder that is later partially deleted. This is because we want to delete the HF dataset cache before each run in order to ensure that the data preprocessing code is run from scratch. + +```jsonc +// .vscode/launch.json +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Debug axolotl prompt - sharegpt", + "type": "python", + "module": "accelerate.commands.launch", + "request": "launch", + "args": [ + "-m", "axolotl.cli.train", "dev_sharegpt.yml", + // The flags below simplify debugging by overriding the axolotl config + // with the debugging tips above. Modify as needed. + "--dataset_processes=1", // limits data preprocessing to one process + "--max_steps=1", // limits training to just one step + "--batch_size=1", // minimizes batch size + "--micro_batch_size=1", // minimizes batch size + "--val_set_size=0", // disables validation + "--sample_packing=False", // disables sample packing which is necessary for small datasets + "--eval_sample_packing=False",// disables sample packing on eval set + "--dataset_prepared_path=temp_debug/axolotl_outputs/data", // send data outputs to a temp folder + "--output_dir=temp_debug/axolotl_outputs/model" // send model outputs to a temp folder + ], + "console": "integratedTerminal", // show output in the integrated terminal + "cwd": "${workspaceFolder}/devtools", // set working directory to devtools from the root of the project + "justMyCode": true, // step through only axolotl code + "env": {"CUDA_VISIBLE_DEVICES": "0", // Since we aren't doing distributed training, we need to limit to one GPU + "HF_HOME": "${workspaceFolder}/devtools/temp_debug/.hf-cache"}, // send HF cache to a temp folder + "preLaunchTask": "cleanup-for-dataprep", // delete temp folders (see below) + } + ] +} +``` + +**Additional notes about this configuration:** + +- The argument `justMyCode` is set to `true` such that you step through only the axolotl code. If you want to step into dependencies, set this to `false`. +- The `preLaunchTask`: `cleanup-for-dataprep` is defined in [.vscode/tasks.json](../.vscode/tasks.json) and is used to delete the following folders before debugging, which is essential to ensure that the data pre-processing code is run from scratch: + - `./devtools/temp_debug/axolotl_outputs` + - `./devtools/temp_debug/.hf-cache/datasets` + +>[!Tip] +> You may not want to delete these folders. For example, if you are debugging model training instead of data pre-processing, you may NOT want to delete the cache or output folders. You may also need to add additional tasks to the `tasks.json` file depending on your use case. + +Below is the [./vscode/tasks.json](../.vscode/tasks.json) file that defines the `cleanup-for-dataprep` task. This task is run before each debugging session when you use the above configuration. Note how there are two tasks that delete the two folders mentioned above. The third task `cleanup-for-dataprep` is a composite task that combines the two tasks. A composite task is necessary because VSCode does not allow you to specify multiple tasks in the `preLaunchTask` argument of the `launch.json` file. + +```jsonc +// .vscode/tasks.json +// this file is used by launch.json +{ + "version": "2.0.0", + "tasks": [ + // this task changes into the devtools directory and deletes the temp_debug/axolotl_outputs folder + { + "label": "delete-outputs", + "type": "shell", + "command": "rm -rf temp_debug/axolotl_outputs", + "options":{ "cwd": "${workspaceFolder}/devtools"}, + "problemMatcher": [] + }, + // this task changes into the devtools directory and deletes the `temp_debug/.hf-cache/datasets` folder + { + "label": "delete-temp-hf-dataset-cache", + "type": "shell", + "command": "rm -rf temp_debug/.hf-cache/datasets", + "options":{ "cwd": "${workspaceFolder}/devtools"}, + "problemMatcher": [] + }, + // this task combines the two tasks above + { + "label": "cleanup-for-dataprep", + "dependsOn": ["delete-outputs", "delete-temp-hf-dataset-cache"], + } + ] +} +``` + +### Customizing your debugger + +Your debugging use case may differ from the example above. The easiest thing to do is to put your own axolotl config in the `devtools` folder and modify the `launch.json` file to use your config. You may also want to modify the `preLaunchTask` to delete different folders or not delete anything at all. + +### Video Tutorial + +The following video tutorial walks through the above configuration and demonstrates how to debug with VSCode, (click the image below to watch): + + +
+ +## Debugging With Docker + +Using [official Axolotl Docker images](https://hub.docker.com/r/winglian/axolotl/tags) is a great way to debug your code, and is a very popular way to use Axolotl. Attaching VSCode to Docker takes a few more steps. + +### Setup + +On the host that is running axolotl (ex: if you are using a remote host), clone the axolotl repo and change your current directory to the root: + +```bash +git clone https://github.com/OpenAccess-AI-Collective/axolotl +cd axolotl +``` + +>[!Tip] +> If you already have axolotl cloned on your host, make sure you have the latest changes and change into the root of the project. + +Next, run the desired docker image and mount the current directory. Below is a docker command you can run to do this:[^2] + +```bash +docker run --privileged --gpus '"all"' --shm-size 10g --rm -it --name axolotl --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 --mount type=bind,src="${PWD}",target=/workspace/axolotl -v ${HOME}/.cache/huggingface:/root/.cache/huggingface winglian/axolotl:main-py3.10-cu118-2.0.1 +``` + +>[!Tip] +> To understand which containers are available, see the [Docker section of the README](../README.md#docker) and the [DockerHub repo](https://hub.docker.com/r/winglian/axolotl/tags). For details of how the Docker containers are built, see axolotl's [Docker CI builds](../.github/workflows/main.yml). + +You will now be in the container. Next, perform an editable install of Axolotl: + +```bash +pip3 install packaging +pip3 install -e '.[flash-attn,deepspeed]' +``` + +### Attach To Container + +Next, if you are using a remote host, [Remote into this host with VSCode](https://code.visualstudio.com/docs/remote/ssh). If you are using a local host, you can skip this step. + +Next, select `Dev Containers: Attach to Running Container...` using the command palette (`CMD + SHIFT + P`) in VSCode. You will be prompted to select a container to attach to. Select the container you just created. You will now be in the container with a working directory that is at the root of the project. Any changes you make to the code will be reflected both in the container and on the host. + +Now you are ready to debug as described above (see [Debugging with VSCode](#debugging-with-vscode)). + +### Video - Attaching To Docker On Remote Host + +Here is a short video that demonstrates how to attach to a Docker container on a remote host: + + +
+ +[^1]: The config actually mimics the command `CUDA_VISIBLE_DEVICES=0 python -m accelerate.commands.launch -m axolotl.cli.train devtools/sharegpt.yml`, but this is the same thing. + +[^2]: Many of the below flags are recommended best practices by Nvidia when using nvidia-container-toolkit. You can read more about these flags [here](https://docs.nvidia.com/deeplearning/frameworks/user-guide/index.html). diff --git a/docs/faq.qmd b/docs/faq.qmd new file mode 100644 index 0000000000000000000000000000000000000000..91413d24e9488385454e78b2562c76fc9fe80f3c --- /dev/null +++ b/docs/faq.qmd @@ -0,0 +1,21 @@ +--- +title: FAQ +description: Frequently asked questions +--- + + +**Q: The trainer stopped and hasn't progressed in several minutes.** + +> A: Usually an issue with the GPUs communicating with each other. See the [NCCL doc](nccl.qmd) + +**Q: Exitcode -9** + +> A: This usually happens when you run out of system RAM. + +**Q: Exitcode -7 while using deepspeed** + +> A: Try upgrading deepspeed w: `pip install -U deepspeed` + +**Q: AttributeError: 'DummyOptim' object has no attribute 'step'** + +> A: You may be using deepspeed with single gpu. Please don't set `deepspeed:` in yaml or cli. diff --git a/docs/fsdp_qlora.qmd b/docs/fsdp_qlora.qmd new file mode 100644 index 0000000000000000000000000000000000000000..69b4ad4454fe60f5f0717d7ca089669d0840099a --- /dev/null +++ b/docs/fsdp_qlora.qmd @@ -0,0 +1,43 @@ +--- +title: FDSP + QLoRA +description: Use FSDP with QLoRA to fine-tune large LLMs on consumer GPUs. +format: + html: + toc: true +--- + +## Background + +Using FSDP with QLoRA is essential for **fine-tuning larger (70b+ parameter) LLMs on consumer GPUs.** For example, you can use FSDP + QLoRA to train a 70b model on two 24GB GPUs[^1]. + +Below, we describe how to use this feature in Axolotl. + +## Usage + +To enable `QLoRA` with `FSDP`, you need to perform the following steps: + +> ![Tip] +> See the [example config](#example-config) file in addition to reading these instructions. + +1. Set `adapter: qlora` in your axolotl config file. +2. Enable FSDP in your axolotl config, as [described here](https://github.com/OpenAccess-AI-Collective/axolotl?tab=readme-ov-file#fsdp). +3. Use one of the supported model types: `llama`, `mistral` or `mixtral`. + +## Example Config + +[examples/llama-2/qlora-fsdp.yml](../examples/llama-2/qlora-fsdp.yml) contains an example of how to enable QLoRA + FSDP in axolotl. + +## References + +- [PR #1378](https://github.com/OpenAccess-AI-Collective/axolotl/pull/1378) enabling QLoRA in FSDP in Axolotl. +- [Blog Post](https://www.answer.ai/posts/2024-03-06-fsdp-qlora.html) from the [Answer.AI](https://www.answer.ai/) team describing the work that enabled QLoRA in FSDP. +- Related HuggingFace PRs Enabling FDSP + QLoRA: + - Accelerate [PR#2544](https://github.com/huggingface/accelerate/pull/2544 ) + - Transformers [PR#29587](https://github.com/huggingface/transformers/pull/29587) + - TRL [PR#1416](https://github.com/huggingface/trl/pull/1416) + - PEFT [PR#1550](https://github.com/huggingface/peft/pull/1550) + + + + +[^1]: This was enabled by [this work](https://www.answer.ai/posts/2024-03-06-fsdp-qlora.html) from the Answer.AI team. diff --git a/docs/images/4d-mask.png b/docs/images/4d-mask.png new file mode 100644 index 0000000000000000000000000000000000000000..181e693cf95907dea33f5c337c6652e7b28bfbc1 Binary files /dev/null and b/docs/images/4d-mask.png differ diff --git a/docs/input_output.qmd b/docs/input_output.qmd new file mode 100644 index 0000000000000000000000000000000000000000..4e2ea1345f96d1d340678c193bf61ebcccae5e5b --- /dev/null +++ b/docs/input_output.qmd @@ -0,0 +1,263 @@ +--- +title: Template-free prompt construction +description: "Template-free prompt construction with the `input_output` format" +--- + + + +- [Background](#background) + - [Masking Inputs](#masking-inputs) + - [You may not want prompt templates](#you-may-not-want-prompt-templates) + - [The `input_output` format](#the-input_output-format) +- [Usage](#usage) + - [1. Prepare Data](#1-prepare-data) + - [2. Use `type: input_output`](#2-use-type-input_output) + - [3. Check the prompts](#3-check-the-prompts) + + + + + +## Background + + + +### Masking Inputs + +One of the most popular features of +[axolotl](https://github.com/OpenAccess-AI-Collective/axolotl) is +setting the following configuration value: + + +```yaml +train_on_inputs: false +``` + +If you declare a [dataset formats](https://github.com/OpenAccess-AI-Collective/axolotl?tab=readme-ov-file#dataset) +such as `alpaca` or `chatml`, axolotl knows what is an input +(i.e. human) vs. an output (i.e. the assistant) and masks the input +labels so that your model can focus on predicting the outputs only. + + + +### You may not want prompt templates + +However, there are many situations where you don't want to use one of +these formats or templates (I usually don't!). This is because they can: + +- Add unnecessary boilerplate to your prompts. +- Create artifacts like special delimiters `<|im_start|>` that can + quickly become footguns if you don't include them correctly at + inference time. +- Enforce a *chat* interface when you do not want one. Sometimes you + just want to fine-tune a model to a very specific task and do NOT + want multi-turn conversations, roles, etc. +- Limit you to only certain roles that the template allows. + + + +### The `input_output` format + +You can construct your prompts without a template by using the +`input_output` format, by setting `type: input_output` in your +configuration file like this: + +**config.yml** + +```yaml +train_on_inputs: false # Mask segments of your data +datasets: + - path: output.jsonl + type: input_output # use template free prompt construction +``` + +Unlike `type: completion`, which is also template-free, +`type: input_output` allows you to mask segments of your text. More +details on how this works are described below. + + + +## Usage + +This is how you can use the `input_output` format: + + + +### 1. Prepare Data + +To use the `input_output` format, collect your data in the following +format into a jsonl file (below is the first row from the file +`output`.jsonl` pretty printed): + +```bash +$ head -n1 output.jsonl | python -m json.tool + +{.cell-output .cell-output-stdout} + { + "segments": [ + { + "label": true, + "text": "Hello\n" + }, + { + "label": true, + "text": "hi there!. " + }, + { + "label": false, + "text": "goodbye " + }, + { + "label": true, + "text": "farewell" + } + ] + } +``` + +Set `label:false` when you want to mask a segment of text so that the +model isn't trained on it. Some things to keep in mind: + +> [!IMPORTANT] +> 1. **EOS, BOS, spaces, newlines etc. are entirely up to you. Axolotl + concatenates all the segments as-is.** The tokenizer doesn't add + anything additional. Notice how I added spaces, newlines, `` + (BOS), and `` (EOS) myself. +> 2. Make sure you check the materialized output to validate that the + prompt is getting assembled how you like. + + + +### 2. Use `type: input_output` + +Let's materialize data with our `output.jsonl` file by setting +`type: input_output` in our axolotl config: + +```yaml +# training_config.yaml +base_model: mistralai/Mistral-7B-v0.1 +data_seed: 49 +seed: 49 + +datasets: + - path: output.jsonl + type: input_output +val_set_size: 0.1 + +sequence_len: 896 +sample_packing: false + +micro_batch_size: 2 +gradient_accumulation_steps: 3 +eval_batch_size: 2 +num_epochs: 1 +learning_rate: 0.0002 + +train_on_inputs: false +special_tokens: + bos_token: "" + eos_token: "" + unk_token: "" +``` + +You can use the following command to materialize your data. The +`--debug` flag will print the tokens, along with the labels so you can +verify that the correct items are being ignored: + +```bash +$ python -m axolotl.cli.preprocess training_config.yaml --debug + +... +[2024-03-05 23:36:46,969] [INFO] [axolotl.check_example_labels:35] [PID:607731] [RANK:0] (1, 1) Hello(22557, 22557) +(13, 13) hi(12014, 12014) there(736, 736) !(28808, 28808) .(28723, 28723) (28705, 28705) good(-100, 1179) bye(-100, 17664) (-100, 28705) fare(19111, 19111) well(5458, 5458) (2, 2) + +``` + +The format is `decoded_token`(`label`, `token_id`), for example, +`(1, 1)` means that the token is ``, the label is `1` and the +token_id is `1`. When the label is `-100` then that token is ignored for +training. + + + +### 3. Check the prompts + +Here is another way to check the materialized output: + +```python +from transformers import AutoTokenizer +from datasets import load_from_disk +import yaml + +directory = !ls last_run_prepared/ +with open('training_config.yaml', 'r') as f: + cfg = yaml.safe_load(f) +model_id = cfg['base_model'] +tok = AutoTokenizer.from_pretrained(model_id) +ds = load_from_disk(f'last_run_prepared/{directory[0]}/') +``` + +```python +>>> row = ds[0] +>>> print(tok.decode(row['input_ids'])) + Hello + hi there!. goodbye farewell +``` + +We can check that the right tokens are ingored by comparing the labels +to each token: + +```python +import pandas as pd +pd.DataFrame([{'token': tok.decode(i), 'label': l, 'id':i} for i,l in + zip(row['input_ids'], row['labels'])]) +``` + +| token | label | id | +|-------|-------|-------| +| 0 | \ | 1 | +| 1 | Hello | 22557 | +| 2 | \\n | 13 | +| 3 | hi | 12014 | +| 4 | there | 736 | +| 5 | ! | 28808 | +| 6 | . | 28723 | +| 7 | | 28705 | +| 8 | good | -100 | +| 9 | bye | -100 | +| 10 | | -100 | +| 11 | fare | 19111 | +| 12 | well | 5458 | +| 13 | \| 2 | + + + +If we look at the input data, the above table seems correct! (The jsonl +version is repeated below for reference): + + +```bash +$ head -n1 output.jsonl | python -m json.tool + +{.cell-output .cell-output-stdout} + { + "segments": [ + { + "label": true, + "text": "Hello\n" + }, + { + "label": true, + "text": "hi there!. " + }, + { + "label": false, + "text": "goodbye " + }, + { + "label": true, + "text": "farewell" + } + ] + } +``` diff --git a/docs/mac.qmd b/docs/mac.qmd new file mode 100644 index 0000000000000000000000000000000000000000..2a83035381d8534b53917d0981d7269addec4f70 --- /dev/null +++ b/docs/mac.qmd @@ -0,0 +1,22 @@ +--- +title: Mac M-series +description: Mac M-series support +--- + +Currently Axolotl on Mac is partially usable, many of the dependencies of Axolotl including Pytorch do not support MPS or have incomplete support. + +Current support: + +- [x] Support for all models +- [x] Full training of models +- [x] LoRA training +- [x] Sample packing +- [ ] FP16 and BF16 (awaiting AMP support for MPS in Pytorch) +- [ ] Tri-dao's flash-attn (until it is supported use spd_attention as an alternative) +- [ ] xformers +- [ ] bitsandbytes (meaning no 4/8 bits loading and bnb optimizers) +- [ ] qlora +- [ ] DeepSpeed + +Untested: +- FSDP diff --git a/docs/multi-node.qmd b/docs/multi-node.qmd new file mode 100644 index 0000000000000000000000000000000000000000..5c6fa976b991c5868efeb3daaf3156a43482517b --- /dev/null +++ b/docs/multi-node.qmd @@ -0,0 +1,48 @@ +--- +title: Multi Node +description: How to use Axolotl on multiple machines +--- + +You will need to create a configuration for accelerate, either by using `accelerate config` and follow the instructions or you can use one of the preset below: + +~/.cache/huggingface/accelerate/default_config.yaml +```yaml +compute_environment: LOCAL_MACHINE +debug: false +distributed_type: FSDP +downcast_bf16: 'no' +machine_rank: 0 # Set to 0 for the main machine, increment by one for other machines +main_process_ip: 10.0.0.4 # Set to main machine's IP +main_process_port: 5000 +main_training_function: main +mixed_precision: bf16 +num_machines: 2 # Change to the number of machines +num_processes: 4 # That's the total number of GPUs, (for example: if you have 2 machines with 4 GPU, put 8) +rdzv_backend: static +same_network: true +tpu_env: [] +tpu_use_cluster: false +tpu_use_sudo: false +use_cpu: false +``` + +Configure your model to use FSDP with for example: +```yaml +fsdp: + - full_shard + - auto_wrap +fsdp_config: + fsdp_offload_params: true + fsdp_state_dict_type: FULL_STATE_DICT + fsdp_transformer_layer_cls_to_wrap: LlamaDecoderLayer +``` + +## Machine configuration + +On each machine you need a copy of Axolotl, we suggest using the same commit to ensure compatibility. + +You will also need to have the same configuration file for your model on each machine. + +On the main machine only, make sure the port you set as `main_process_port` is open in TCP and reachable by other machines. + +All you have to do now is launch using accelerate as you would usually do on each machine and voila, the processes will start once you have launched accelerate on every machine. diff --git a/docs/multipack.qmd b/docs/multipack.qmd new file mode 100644 index 0000000000000000000000000000000000000000..097bcd2e503c2e35f637824002a68682fd52979f --- /dev/null +++ b/docs/multipack.qmd @@ -0,0 +1,76 @@ +--- +title: Multipack (Sample Packing) +description: Multipack is a technique to pack multiple sequences into a single batch to increase training throughput. +--- + +## Visualization of Multipack with Flash Attention + +Because Flash Attention simply drops the attention mask, we do not need to +construct a 4d attention mask. We only need to concatenate the sequences into +a single batch and let flash attention know where each new sequence begins. + + +4k context, bsz =4, +each character represents 256 tokens +X represents a padding token + +``` + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 +[[ A A A A A A A A A A A ] + B B B B B B ] + C C C C C C C ] + D D D D ]] + +[[ E E E E E E E E ] + [ F F F F ] + [ G G G ] + [ H H H H ]] + +[[ I I I ] + [ J J J ] + [ K K K K K] + [ L L L ]] +``` + +after padding to longest input in each step +``` + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 +[[ A A A A A A A A A A A ] + B B B B B B X X X X X X ] + C C C C C C C X X X X ] + D D D D X X X X X X X ]] + +[[ E E E E E E E E ] + [ F F F F X X X X ] + [ G G G X X X X X ] + [ H H H H X X X X ]] + +[[ I I I X X ] + [ J J J X X ] + [ K K K K K ] + [ L L L X X ]] +``` + +w packing ( note it's the same effective number of tokens per step, but a true bsz of 1) +``` + 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 +[[ A A A A A A A A A A A B B B B B + B C C C C C C C D D D D E E E E + E E E E F F F F F G G G H H H H + I I I J J J J K K K K K L L L X ]] +``` + +cu_seqlens: +[[ 0, 11, 17, 24, 28, 36, 41 44, 48, 51, 55, 60, 64]] + + +## Multipack without Flash Attention + +Multipack can still be achieved without Flash attention, but with lower packing +efficiency as we are not able to join multiple batches into a single batch due to +context length limits without flash attention. We can use either Pytorch's Scaled +Dot Product Attention implementation or native Pytorch attention implementation +along with [4d attention masks](https://github.com/huggingface/transformers/pull/27539) +to pack sequences together and avoid cross attention. + +axolotl diff --git a/docs/nccl.qmd b/docs/nccl.qmd new file mode 100644 index 0000000000000000000000000000000000000000..3b616aa66526e1d18cc3b292a7261c6470ce6540 --- /dev/null +++ b/docs/nccl.qmd @@ -0,0 +1,49 @@ +--- +title: NCCL +description: Troubleshooting NCCL issues +--- + +NVIDIA NCCL is a library to facilitate and optimize multi-GPU communication operations, such as broadcast, all-gather, reduce, all-reduce, etc. Broadly, NCCL configuration is highly environment-specific and is configured via several [environment variables](https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html). A common NCCL-related problem occurs when a long-running operation times out causing the training process to abort: + +```text +Watchdog caught collective operation timeout: WorkNCCL(SeqNum=42, OpType=ALLGATHER, Timeout(ms)=1800000) ran for 1806948 milliseconds before timing out. +``` + +Often, this timeout will happen after 30 minutes (the default setting) and is accompanied by below-average power consumption with near 100% GPU utilization before the error is raised. Nvidia recommends [disabling PCI access control services (ACS)](https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/troubleshooting.html#pci-access-control-services-acs) as a possible solution if this is available to you. + +Forcing cross-GPU communication via [NVLink](https://en.wikipedia.org/wiki/NVLink) may help without increasing timeouts. To verify that your configuration is leveraging NVLink run the following command: + +```shell +nvidia-smi nvlink --status +``` + +To force NCCL to use NVLink, simply set this in the environment: + +```shell +export NCCL_P2P_LEVEL=NVL +``` + +If NVLink is not available in your environment there are other options for ``NCCL_P2P_LEVEL`` in the table below: + +| NCCL_P2P_LEVEL | Description | +| -------------- | ----------- | +| PIX | P2P data transfers through no more than a single PCIe bridge. Faster data transfer rates vs to paths involving multiple bridges, but slower compared to direct GPU-to-GPU communication. | +| PXB | P2P data transfers through multiple PCIe bridges but not going through the PCIe Host Bridge; this path involves a complex routing process, potentially incurring a moderate level of latency. | +| PHB | P2P data transfers occur over the PCIe and through a PCIe Host Bridge, typically involving the CPU, which can facilitate direct memory access but might introduce additional latency compared to more direct paths (ex PIX, NVL) | + +To validate that acceptable data transfer speeds exist for your training job, running [NCCL Tests](https://github.com/NVIDIA/nccl-tests/blob/master/README.md) can help pinpoint bottlenecks, for example: + +```shell +./build/all_reduce_perf -b 8 -e 128M -f 2 -g 3 +``` + +It can be useful when debugging NCCL communication timeouts to activate additional logging in both PyTorch and NCCL: + +```shell +export NCCL_DEBUG=INFO +export NCCL_DEBUG_SUBSYS=ALL +export TORCH_DISTRIBUTED_DEBUG=INFO +export TORCHELASTIC_ERROR_FILE=/PATH/TO/torcherror.log +``` + +Finally, if you believe your training job needs more time you can increase the timeout past 30 minutes by setting the ``ddp_timeout`` value in the Axolotl configuration. See [PyTorch init_process_group](https://pytorch.org/docs/stable/distributed.html#torch.distributed.init_process_group) for documentation on this value. diff --git a/docs/rlhf.qmd b/docs/rlhf.qmd new file mode 100644 index 0000000000000000000000000000000000000000..7db68915ade99bf4213ce977ee45619df39a6885 --- /dev/null +++ b/docs/rlhf.qmd @@ -0,0 +1,72 @@ +--- +title: "RLHF (Beta)" +description: "Reinforcement Learning from Human Feedback is a method whereby a language model is optimized from data using human feedback." +--- + +### Overview + +Reinforcement Learning from Human Feedback is a method whereby a language model is optimized from data using human +feedback. Various methods include, but not limited to: + +- Proximal Policy Optimization (PPO) (not yet supported in axolotl) +- Direct Preference Optimization (DPO) +- Identity Preference Optimization (IPO) + + +### RLHF using Axolotl + +>[!IMPORTANT] +>This is a BETA feature and many features are not fully implemented. You are encouraged to open new PRs to improve the integration and functionality. + +The various RL training methods are implemented in trl and wrapped via axolotl. Below are various examples with how you can use various preference datasets to train models that use ChatML + +#### DPO +```yaml +rl: dpo +datasets: + - path: Intel/orca_dpo_pairs + split: train + type: chatml.intel + - path: argilla/ultrafeedback-binarized-preferences + split: train + type: chatml.argilla +``` + +#### IPO +```yaml +rl: ipo +``` + +#### ORPO + +Paper: https://arxiv.org/abs/2403.07691 + +```yaml +rl: orpo +orpo_alpha: 0.1 +remove_unused_columns: false + +chat_template: chatml +datasets: + - path: argilla/ultrafeedback-binarized-preferences-cleaned + type: orpo.chat_template +``` + +#### Using local dataset files +```yaml +datasets: + - ds_type: json + data_files: + - orca_rlhf.jsonl + split: train + type: chatml.intel +``` + +#### Trl autounwrap for peft + +Trl supports autounwrapping peft models, so that a ref model does not need to be additionally loaded, leading to less VRAM needed. This is on by default. To turn it off, pass the following config. + +```yaml +# load ref model when adapter training. +rl_adapter_ref_model: true +``` diff --git a/examples/cerebras/btlm-ft.yml b/examples/cerebras/btlm-ft.yml new file mode 100644 index 0000000000000000000000000000000000000000..18dd86e6b432ebc497a44cb3cfd1b3088a8970ec --- /dev/null +++ b/examples/cerebras/btlm-ft.yml @@ -0,0 +1,89 @@ +base_model: cerebras/btlm-3b-8k-base +model_type: AutoModelForCausalLM +tokenizer_type: GPT2Tokenizer +trust_remote_code: true +tokenizer_use_fast: true +tokenizer_legacy: true + +load_in_8bit: false +load_in_4bit: false +strict: false +push_dataset_to_hub: +hf_use_auth_token: true +datasets: + - path: mhenrichsen/alpaca_2k_test + type: alpaca +dataset_prepared_path: last_prepared_run +val_set_size: 0.05 + +adapter: +lora_model_dir: +sequence_len: 2048 +max_packed_sequence_len: +sample_packing: false +sample_packing_eff_est: +sample_packing_seq_len_multiplier: +total_num_tokens: + +lora_r: +lora_alpha: +lora_dropout: +lora_target_modules: +lora_target_linear: +lora_fan_in_fan_out: + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +output_dir: btlm-out +gradient_accumulation_steps: 1 +micro_batch_size: 1 +num_epochs: 1 +optimizer: adamw_torch +adam_beta2: 0.95 +adam_eps: 0.000000001 +max_grad_norm: 1.0 + +torchdistx_path: +lr_scheduler: cosine +lr_quadratic_warmup: true +learning_rate: 0.000085 +train_on_inputs: true +group_by_length: false +bf16: auto +fp16: +tf32: true + +gradient_checkpointing: false +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 + +xformers_attention: +flash_attention: true +sdp_attention: +flash_optimum: + +gptq_groupsize: +gptq_model_v1: + +warmup_steps: 32 +evals_per_epoch: 4 +saves_per_epoch: 1 +save_total_limit: + +debug: +deepspeed: +weight_decay: 0.1 +special_tokens: + pad_token: "<|endoftext|>" +fsdp: +# - full_shard +# - auto_wrap +fsdp_config: +# fsdp_state_dict_type: FULL_STATE_DICT +# fsdp_transformer_layer_cls_to_wrap: BTLMBlock diff --git a/examples/cerebras/qlora.yml b/examples/cerebras/qlora.yml new file mode 100644 index 0000000000000000000000000000000000000000..c4f44326c2bebeb84d086447d7cdc1715375e36b --- /dev/null +++ b/examples/cerebras/qlora.yml @@ -0,0 +1,59 @@ +base_model: cerebras/Cerebras-GPT-1.3B +load_in_8bit: false +load_in_4bit: true +strict: false +push_dataset_to_hub: +datasets: + - path: teknium/GPT4-LLM-Cleaned + type: alpaca +dataset_prepared_path: +val_set_size: 0.05 +adapter: qlora +lora_model_dir: +sequence_len: 2048 +lora_r: 16 +lora_alpha: 32 +lora_dropout: 0.05 +lora_target_modules: + - c_fc + - c_attn + - c_proj +lora_target_linear: +lora_fan_in_fan_out: +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: +output_dir: ./qlora-out +batch_size: 4 +micro_batch_size: 4 +num_epochs: 2 +optimizer: paged_adamw_8bit +torchdistx_path: +lr_scheduler: cosine +learning_rate: 0.0002 +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: true +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: true +flash_attention: +gptq_groupsize: +gptq_model_v1: +warmup_steps: 10 +evals_per_epoch: 4 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.1 +fsdp: +fsdp_config: +special_tokens: + pad_token: "<|endoftext|>" diff --git a/examples/code-llama/13b/lora.yml b/examples/code-llama/13b/lora.yml new file mode 100644 index 0000000000000000000000000000000000000000..ce5a892d08d4df962070bdb07533c08a2444f5ac --- /dev/null +++ b/examples/code-llama/13b/lora.yml @@ -0,0 +1,67 @@ +base_model: codellama/CodeLlama-13b-hf +model_type: LlamaForCausalLM +tokenizer_type: CodeLlamaTokenizer + +load_in_8bit: true +load_in_4bit: false +strict: false + +datasets: + - path: mhenrichsen/alpaca_2k_test + type: alpaca +dataset_prepared_path: +val_set_size: 0.05 +output_dir: ./lora-out + +sequence_len: 4096 +sample_packing: true +pad_to_sequence_len: true + +adapter: lora +lora_model_dir: +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_linear: true +lora_fan_in_fan_out: + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 4 +micro_batch_size: 2 +num_epochs: 4 +optimizer: adamw_bnb_8bit +lr_scheduler: cosine +learning_rate: 0.0002 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: false + +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true +s2_attention: + +warmup_steps: 10 +evals_per_epoch: 4 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: + bos_token: "" + eos_token: "" + unk_token: "" diff --git a/examples/code-llama/13b/qlora.yml b/examples/code-llama/13b/qlora.yml new file mode 100644 index 0000000000000000000000000000000000000000..d822e6847068b2400c65409d41d29d667971732f --- /dev/null +++ b/examples/code-llama/13b/qlora.yml @@ -0,0 +1,68 @@ +base_model: codellama/CodeLlama-13b-hf +model_type: LlamaForCausalLM +tokenizer_type: CodeLlamaTokenizer + +load_in_8bit: false +load_in_4bit: true +strict: false + +datasets: + - path: mhenrichsen/alpaca_2k_test + type: alpaca +dataset_prepared_path: +val_set_size: 0.05 +output_dir: ./qlora-out + +adapter: qlora +lora_model_dir: + +sequence_len: 4096 +sample_packing: true +pad_to_sequence_len: true + +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_modules: +lora_target_linear: true +lora_fan_in_fan_out: + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 4 +micro_batch_size: 2 +num_epochs: 4 +optimizer: paged_adamw_32bit +lr_scheduler: cosine +learning_rate: 0.0002 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: false + +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true + +warmup_steps: 10 +evals_per_epoch: 4 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: + bos_token: "" + eos_token: "" + unk_token: "" diff --git a/examples/code-llama/34b/lora.yml b/examples/code-llama/34b/lora.yml new file mode 100644 index 0000000000000000000000000000000000000000..dfef2538b0ce3d618b7b484990f53cd5d7166bf5 --- /dev/null +++ b/examples/code-llama/34b/lora.yml @@ -0,0 +1,67 @@ +base_model: codellama/CodeLlama-34b-hf +model_type: LlamaForCausalLM +tokenizer_type: CodeLlamaTokenizer + +load_in_8bit: true +load_in_4bit: false +strict: false + +datasets: + - path: mhenrichsen/alpaca_2k_test + type: alpaca +dataset_prepared_path: +val_set_size: 0.05 +output_dir: ./lora-out + +sequence_len: 4096 +sample_packing: true +pad_to_sequence_len: true + +adapter: lora +lora_model_dir: +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_linear: true +lora_fan_in_fan_out: + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 4 +micro_batch_size: 2 +num_epochs: 4 +optimizer: adamw_bnb_8bit +lr_scheduler: cosine +learning_rate: 0.0002 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: false + +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true +s2_attention: + +warmup_steps: 10 +evals_per_epoch: 4 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: + bos_token: "" + eos_token: "" + unk_token: "" diff --git a/examples/code-llama/34b/qlora.yml b/examples/code-llama/34b/qlora.yml new file mode 100644 index 0000000000000000000000000000000000000000..77f821e1c830787c496deeda07eab22c04d5c806 --- /dev/null +++ b/examples/code-llama/34b/qlora.yml @@ -0,0 +1,68 @@ +base_model: codellama/CodeLlama-34b-hf +model_type: LlamaForCausalLM +tokenizer_type: CodeLlamaTokenizer + +load_in_8bit: false +load_in_4bit: true +strict: false + +datasets: + - path: mhenrichsen/alpaca_2k_test + type: alpaca +dataset_prepared_path: +val_set_size: 0.05 +output_dir: ./qlora-out + +adapter: qlora +lora_model_dir: + +sequence_len: 4096 +sample_packing: true +pad_to_sequence_len: true + +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_modules: +lora_target_linear: true +lora_fan_in_fan_out: + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 4 +micro_batch_size: 2 +num_epochs: 4 +optimizer: paged_adamw_32bit +lr_scheduler: cosine +learning_rate: 0.0002 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: false + +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true + +warmup_steps: 10 +evals_per_epoch: 4 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: + bos_token: "" + eos_token: "" + unk_token: "" diff --git a/examples/code-llama/7b/lora.yml b/examples/code-llama/7b/lora.yml new file mode 100644 index 0000000000000000000000000000000000000000..3e6c7fe620af2b513f2fbc1372cc19ba0fd907cd --- /dev/null +++ b/examples/code-llama/7b/lora.yml @@ -0,0 +1,67 @@ +base_model: codellama/CodeLlama-7b-hf +model_type: LlamaForCausalLM +tokenizer_type: CodeLlamaTokenizer + +load_in_8bit: true +load_in_4bit: false +strict: false + +datasets: + - path: mhenrichsen/alpaca_2k_test + type: alpaca +dataset_prepared_path: +val_set_size: 0.05 +output_dir: ./lora-out + +sequence_len: 4096 +sample_packing: true +pad_to_sequence_len: true + +adapter: lora +lora_model_dir: +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_linear: true +lora_fan_in_fan_out: + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 4 +micro_batch_size: 2 +num_epochs: 4 +optimizer: adamw_bnb_8bit +lr_scheduler: cosine +learning_rate: 0.0002 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: false + +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true +s2_attention: + +warmup_steps: 10 +evals_per_epoch: 4 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: + bos_token: "" + eos_token: "" + unk_token: "" diff --git a/examples/code-llama/7b/qlora.yml b/examples/code-llama/7b/qlora.yml new file mode 100644 index 0000000000000000000000000000000000000000..e817b113cc0e63941533b79630d4fde60369b942 --- /dev/null +++ b/examples/code-llama/7b/qlora.yml @@ -0,0 +1,68 @@ +base_model: codellama/CodeLlama-7b-hf +model_type: LlamaForCausalLM +tokenizer_type: CodeLlamaTokenizer + +load_in_8bit: false +load_in_4bit: true +strict: false + +datasets: + - path: mhenrichsen/alpaca_2k_test + type: alpaca +dataset_prepared_path: +val_set_size: 0.05 +output_dir: ./qlora-out + +adapter: qlora +lora_model_dir: + +sequence_len: 4096 +sample_packing: true +pad_to_sequence_len: true + +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_modules: +lora_target_linear: true +lora_fan_in_fan_out: + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 4 +micro_batch_size: 2 +num_epochs: 4 +optimizer: paged_adamw_32bit +lr_scheduler: cosine +learning_rate: 0.0002 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: false + +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true + +warmup_steps: 10 +evals_per_epoch: 4 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: + bos_token: "" + eos_token: "" + unk_token: "" diff --git a/examples/code-llama/README.md b/examples/code-llama/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a5011e347283ab67b2f276de85e7fba4344bf09f --- /dev/null +++ b/examples/code-llama/README.md @@ -0,0 +1,22 @@ +# Overview + +This is an example of CodeLLaMA configuration for 7b, 13b and 34b. + +The 7b variant fits on any 24GB VRAM GPU and will take up about 17 GB of VRAM during training if using qlora and 20 GB if using lora. On a RTX 4090 it trains 3 epochs of the default dataset in about 15 minutes. + +The 13b variant will fit if you change these settings to these values: +gradient_accumulation_steps: 2 +micro_batch_size: 1 + +The 34b variant does not fit on 24GB of VRAM - you will need something with +40 gb VRAM that also supports flash attention v2 - A6000 or A100 are good choices. + +```shell +accelerate launch scripts/finetune.py examples/code-llama/[MODEL_SIZE]/qlora.yml + +``` +or + +```shell +accelerate launch scripts/finetune.py examples/code-llama/[MODEL_SIZE]/lora.yml + +``` diff --git a/examples/colab-notebooks/colab-axolotl-example.ipynb b/examples/colab-notebooks/colab-axolotl-example.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..9adbe000476592250808b229a3df3eb32ef4e630 --- /dev/null +++ b/examples/colab-notebooks/colab-axolotl-example.ipynb @@ -0,0 +1,216 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "AKjdG7tbTb-n" + }, + "source": [ + "# Example notebook for running Axolotl on google colab" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RcbNpOgWRcii" + }, + "outputs": [], + "source": [ + "import torch\n", + "# Check so there is a gpu available, a T4(free tier) is enough to run this notebook\n", + "assert (torch.cuda.is_available()==True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "h3nLav8oTRA5" + }, + "source": [ + "## Install Axolotl and dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "3c3yGAwnOIdi", + "outputId": "e3777b5a-40ef-424f-e181-62dfecd1dd01" + }, + "outputs": [], + "source": [ + "!pip install torch==\"2.1.2\"\n", + "!pip install -e git+https://github.com/OpenAccess-AI-Collective/axolotl#egg=axolotl\n", + "!pip install flash-attn==\"2.5.0\"\n", + "!pip install deepspeed==\"0.13.1\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BW2MFr7HTjub" + }, + "source": [ + "## Create an yaml config file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9pkF2dSoQEUN" + }, + "outputs": [], + "source": [ + "import yaml\n", + "\n", + "# Your YAML string\n", + "yaml_string = \"\"\"\n", + "base_model: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T\n", + "model_type: LlamaForCausalLM\n", + "tokenizer_type: LlamaTokenizer\n", + "is_llama_derived_model: true\n", + "\n", + "load_in_8bit: false\n", + "load_in_4bit: true\n", + "strict: false\n", + "\n", + "datasets:\n", + " - path: mhenrichsen/alpaca_2k_test\n", + " type: alpaca\n", + "dataset_prepared_path:\n", + "val_set_size: 0.05\n", + "output_dir: ./qlora-out\n", + "\n", + "adapter: qlora\n", + "lora_model_dir:\n", + "\n", + "sequence_len: 1096\n", + "sample_packing: true\n", + "pad_to_sequence_len: true\n", + "\n", + "lora_r: 32\n", + "lora_alpha: 16\n", + "lora_dropout: 0.05\n", + "lora_target_modules:\n", + "lora_target_linear: true\n", + "lora_fan_in_fan_out:\n", + "\n", + "wandb_project:\n", + "wandb_entity:\n", + "wandb_watch:\n", + "wandb_name:\n", + "wandb_log_model:\n", + "\n", + "mlflow_experiment_name: colab-example\n", + "\n", + "gradient_accumulation_steps: 1\n", + "micro_batch_size: 1\n", + "num_epochs: 4\n", + "max_steps: 20\n", + "optimizer: paged_adamw_32bit\n", + "lr_scheduler: cosine\n", + "learning_rate: 0.0002\n", + "\n", + "train_on_inputs: false\n", + "group_by_length: false\n", + "bf16: false\n", + "fp16: true\n", + "tf32: false\n", + "\n", + "gradient_checkpointing: true\n", + "early_stopping_patience:\n", + "resume_from_checkpoint:\n", + "local_rank:\n", + "logging_steps: 1\n", + "xformers_attention:\n", + "flash_attention: false\n", + "\n", + "warmup_steps: 10\n", + "evals_per_epoch:\n", + "saves_per_epoch:\n", + "debug:\n", + "deepspeed:\n", + "weight_decay: 0.0\n", + "fsdp:\n", + "fsdp_config:\n", + "special_tokens:\n", + "\n", + "\"\"\"\n", + "\n", + "# Convert the YAML string to a Python dictionary\n", + "yaml_dict = yaml.safe_load(yaml_string)\n", + "\n", + "# Specify your file path\n", + "file_path = 'test_axolotl.yaml'\n", + "\n", + "# Write the YAML file\n", + "with open(file_path, 'w') as file:\n", + " yaml.dump(yaml_dict, file)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bidoj8YLTusD" + }, + "source": [ + "## Launch the training" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ydTI2Jk2RStU", + "outputId": "d6d0df17-4b53-439c-c802-22c0456d301b" + }, + "outputs": [], + "source": [ + "# Buy using the ! the comand will be executed as a bash command\n", + "!accelerate launch -m axolotl.cli.train /content/test_axolotl.yaml" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Play with inference" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Buy using the ! the comand will be executed as a bash command\n", + "!accelerate launch -m axolotl.cli.inference /content/test_axolotl.yaml \\\n", + " --qlora_model_dir=\"./qlora-out\" --gradio" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/examples/falcon/config-7b-lora.yml b/examples/falcon/config-7b-lora.yml new file mode 100644 index 0000000000000000000000000000000000000000..5be9c6425326a5e651680f410a125303449dd08c --- /dev/null +++ b/examples/falcon/config-7b-lora.yml @@ -0,0 +1,64 @@ +base_model: tiiuae/falcon-7b +trust_remote_code: true +model_type: AutoModelForCausalLM +tokenizer_type: AutoTokenizer + +load_in_8bit: true +load_in_4bit: false +gptq: false +strict: false +push_dataset_to_hub: +datasets: + - path: teknium/GPT4-LLM-Cleaned + type: alpaca:chat +dataset_prepared_path: +val_set_size: 0.05 +adapter: lora +lora_model_dir: +sequence_len: 2048 +max_packed_sequence_len: +lora_r: 16 +lora_alpha: 32 +lora_dropout: 0.0 +lora_target_modules: +lora_target_linear: true +lora_fan_in_fan_out: +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: +output_dir: ./falcon-7b +batch_size: 2 +micro_batch_size: 1 +num_epochs: 4 +optimizer: adamw_bnb_8bit +torchdistx_path: +lr_scheduler: cosine +learning_rate: 0.00003 +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: true +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: true +flash_attention: +gptq_groupsize: +gptq_model_v1: +warmup_steps: 40 +evals_per_epoch: 4 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: + pad_token: "<|endoftext|>" + bos_token: "<|endoftext|>" + eos_token: "<|endoftext|>" diff --git a/examples/falcon/config-7b-qlora.yml b/examples/falcon/config-7b-qlora.yml new file mode 100644 index 0000000000000000000000000000000000000000..eb1cdfcdba27a2e993446fc11e6922d1d8786369 --- /dev/null +++ b/examples/falcon/config-7b-qlora.yml @@ -0,0 +1,93 @@ +# 1b: tiiuae/falcon-rw-1b +# 40b: tiiuae/falcon-40b +base_model: tiiuae/falcon-7b +# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main +trust_remote_code: true +model_type: AutoModelForCausalLM +tokenizer_type: AutoTokenizer + +load_in_8bit: false +# enable 4bit for QLoRA +load_in_4bit: true +gptq: false +strict: false +push_dataset_to_hub: +datasets: + - path: QingyiSi/Alpaca-CoT + data_files: + - Chain-of-Thought/formatted_cot_data/gsm8k_train.json + type: "alpaca:chat" +dataset_prepared_path: +val_set_size: 0.05 +# enable QLoRA +adapter: qlora +lora_model_dir: +sequence_len: 2048 +max_packed_sequence_len: + +# hyperparameters from QLoRA paper Appendix B.2 +# "We find hyperparameters to be largely robust across datasets" +lora_r: 64 +lora_alpha: 16 +# 0.1 for models up to 13B +# 0.05 for 33B and 65B models +lora_dropout: 0.05 +# add LoRA modules on all linear layers of the base model +lora_target_modules: +lora_target_linear: true +lora_fan_in_fan_out: + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: +output_dir: ./qlora-out + +# QLoRA paper Table 9 +# - 16 for 7b & 13b +# - 32 for 33b, 64 for 64b +# Max size tested on A6000 +# - 7b: 40 +# - 40b: 4 +# decrease if OOM, increase for max VRAM utilization +micro_batch_size: 1 +gradient_accumulation_steps: 2 +num_epochs: 4 +# Optimizer for QLoRA +optimizer: paged_adamw_32bit +torchdistx_path: +lr_scheduler: cosine +# QLoRA paper Table 9 +# - 2e-4 for 7b & 13b +# - 1e-4 for 33b & 64b +learning_rate: 0.0002 +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: true +gradient_checkpointing: true +# stop training after this many evaluation losses have increased in a row +# https://huggingface.co/transformers/v4.2.2/_modules/transformers/trainer_callback.html#EarlyStoppingCallback +early_stopping_patience: 3 +resume_from_checkpoint: +auto_resume_from_checkpoints: true +local_rank: +logging_steps: 1 +xformers_attention: true +flash_attention: +gptq_groupsize: +gptq_model_v1: +warmup_steps: 10 +evals_per_epoch: 4 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.000001 +fsdp: +fsdp_config: +special_tokens: + pad_token: "<|endoftext|>" + bos_token: "<|endoftext|>" + eos_token: "<|endoftext|>" diff --git a/examples/falcon/config-7b.yml b/examples/falcon/config-7b.yml new file mode 100644 index 0000000000000000000000000000000000000000..1dd46a93ff217faabcb5be37eeaa6a6489149133 --- /dev/null +++ b/examples/falcon/config-7b.yml @@ -0,0 +1,64 @@ +base_model: tiiuae/falcon-7b +trust_remote_code: true +model_type: AutoModelForCausalLM +tokenizer_type: AutoTokenizer + +load_in_8bit: false +load_in_4bit: false +gptq: false +strict: false +push_dataset_to_hub: +datasets: + - path: teknium/GPT4-LLM-Cleaned + type: alpaca:chat +dataset_prepared_path: +val_set_size: 0.05 +adapter: +lora_model_dir: +sequence_len: 2048 +max_packed_sequence_len: +lora_r: 64 +lora_alpha: 32 +lora_dropout: 0.0 +lora_target_modules: +lora_target_linear: true +lora_fan_in_fan_out: +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: +output_dir: ./falcon-7b +batch_size: 2 +micro_batch_size: 1 +num_epochs: 4 +optimizer: adamw_bnb_8bit +torchdistx_path: +lr_scheduler: cosine +learning_rate: 0.00003 +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: true +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: true +flash_attention: +gptq_groupsize: +gptq_model_v1: +warmup_steps: 40 +evals_per_epoch: 4 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: + pad_token: "<|endoftext|>" + bos_token: "<|endoftext|>" + eos_token: "<|endoftext|>" diff --git a/examples/gemma/qlora.yml b/examples/gemma/qlora.yml new file mode 100644 index 0000000000000000000000000000000000000000..262197cb7e37405a1f51183675ae77e81564d825 --- /dev/null +++ b/examples/gemma/qlora.yml @@ -0,0 +1,65 @@ +# use google/gemma-7b if you have access +base_model: mhenrichsen/gemma-7b +model_type: AutoModelForCausalLM +tokenizer_type: AutoTokenizer + +load_in_8bit: false +load_in_4bit: true +strict: false + +# huggingface repo +datasets: + - path: mhenrichsen/alpaca_2k_test + type: alpaca +val_set_size: 0.1 +output_dir: ./out + +adapter: qlora +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_linear: true + +sequence_len: 4096 +sample_packing: false +pad_to_sequence_len: true + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + + +gradient_accumulation_steps: 3 +micro_batch_size: 2 +num_epochs: 4 +optimizer: adamw_bnb_8bit +lr_scheduler: cosine +learning_rate: 0.0002 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: false + +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true + +warmup_ratio: 0.1 +evals_per_epoch: 4 +eval_table_size: +eval_max_new_tokens: 128 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: diff --git a/examples/gptj/qlora.yml b/examples/gptj/qlora.yml new file mode 100644 index 0000000000000000000000000000000000000000..cd3f2e2ad78b3d4f75898651126db420443a5547 --- /dev/null +++ b/examples/gptj/qlora.yml @@ -0,0 +1,57 @@ +base_model: EleutherAI/gpt-j-6b +load_in_8bit: false +load_in_4bit: true +strict: false +push_dataset_to_hub: +datasets: + - path: teknium/GPT4-LLM-Cleaned + type: alpaca +dataset_prepared_path: +val_set_size: 0.05 +adapter: qlora +lora_model_dir: +sequence_len: 2048 +max_packed_sequence_len: +lora_r: 8 +lora_alpha: 32 +lora_dropout: 0.05 +lora_target_modules: +lora_target_linear: true +lora_fan_in_fan_out: +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: +output_dir: ./qlora-out +gradient_accumulation_steps: 2 +micro_batch_size: 2 +num_epochs: 2 +optimizer: paged_adamw_8bit +torchdistx_path: +lr_scheduler: cosine +learning_rate: 0.0001 +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: true +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: true +flash_attention: +gptq_groupsize: +gptq_model_v1: +warmup_steps: 10 +evals_per_epoch: 4 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.1 +fsdp: +fsdp_config: +special_tokens: + pad_token: "<|endoftext|>" diff --git a/examples/jeopardy-bot/config.yml b/examples/jeopardy-bot/config.yml new file mode 100644 index 0000000000000000000000000000000000000000..a672c7b94f46301bb6226d1ca05318681a87cff5 --- /dev/null +++ b/examples/jeopardy-bot/config.yml @@ -0,0 +1,55 @@ +base_model: huggyllama/llama-7b +model_type: LlamaForCausalLM +tokenizer_type: LlamaTokenizer +load_in_8bit: false +datasets: + - path: openaccess-ai-collective/jeopardy + type: jeopardy +dataset_prepared_path: +val_set_size: 0.02 +adapter: +lora_model_dir: +sequence_len: 512 +max_packed_sequence_len: +lora_r: +lora_alpha: +lora_dropout: +lora_target_modules: +lora_fan_in_fan_out: false +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: +output_dir: ./jeopardy-bot-7b +gradient_accumulation_steps: 1 +micro_batch_size: 1 +num_epochs: 4 +optimizer: adamw_bnb_8bit +torchdistx_path: +lr_scheduler: cosine +learning_rate: 0.00003 +train_on_inputs: false +group_by_length: false +bf16: auto +tf32: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 5 +xformers_attention: true +flash_attention: +gptq_groupsize: +gptq_model_v1: +warmup_steps: 20 +evals_per_epoch: 4 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.1 +fsdp: +fsdp_config: +tokens: + bos_token: "" + eos_token: "" + unk_token: "" diff --git a/examples/llama-2/README.md b/examples/llama-2/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2ddd711e29a34b72f34efb5745e241ca8e015675 --- /dev/null +++ b/examples/llama-2/README.md @@ -0,0 +1,24 @@ +# Overview + +This is an example of a llama-2 configuration for 7b and 13b. The yaml file contains configuration for the 7b variant, but you can just aswell use the same settings for 13b. + +The 7b variant fits on any 24GB VRAM GPU and will take up about 17 GB of VRAM during training if using qlora and 20 GB if using lora. On a RTX 4090 it trains 3 epochs of the default dataset in about 15 minutes. + +The 13b variant will fit if you change these settings to these values: +gradient_accumulation_steps: 2 +micro_batch_size: 1 + +```shell +accelerate launch -m axolotl.cli.train examples/llama-2/qlora.yml +``` +or + +```shell +accelerate launch -m axolotl.cli.train examples/llama-2/lora.yml +``` + +To launch a full finetuning with 16-bit precision: + +```shell +accelerate launch -m axolotl.cli.train examples/llama-2/fft_optimized.yml +``` diff --git a/examples/llama-2/fft_optimized.yml b/examples/llama-2/fft_optimized.yml new file mode 100644 index 0000000000000000000000000000000000000000..74edc95e6bcee7b8792590e40f68f4bc8d2a6f2c --- /dev/null +++ b/examples/llama-2/fft_optimized.yml @@ -0,0 +1,68 @@ +base_model: NousResearch/Llama-2-7b-hf +model_type: LlamaForCausalLM +tokenizer_type: LlamaTokenizer + +load_in_8bit: false +load_in_4bit: false +strict: false + +datasets: + - path: mhenrichsen/alpaca_2k_test + type: alpaca +dataset_prepared_path: last_run_prepared +val_set_size: 0.05 +output_dir: ./out + +sequence_len: 4096 +sample_packing: true +pad_to_sequence_len: true + +adapter: +lora_model_dir: +lora_r: +lora_alpha: +lora_dropout: +lora_target_linear: +lora_fan_in_fan_out: + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 1 +micro_batch_size: 1 +num_epochs: 1 +optimizer: adamw_bnb_8bit +lr_scheduler: cosine +learning_rate: 0.0002 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: false + +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true +flash_attn_cross_entropy: false +flash_attn_rms_norm: true +flash_attn_fuse_qkv: false +flash_attn_fuse_mlp: true + +warmup_steps: 100 +evals_per_epoch: 4 +eval_table_size: +saves_per_epoch: 1 +debug: +deepspeed: #deepspeed_configs/zero2.json # multi-gpu only +weight_decay: 0.1 +fsdp: +fsdp_config: +special_tokens: diff --git a/examples/llama-2/gptq-lora.yml b/examples/llama-2/gptq-lora.yml new file mode 100644 index 0000000000000000000000000000000000000000..68ca9ed31c6c5b76d4e319aee7a7267064219f02 --- /dev/null +++ b/examples/llama-2/gptq-lora.yml @@ -0,0 +1,72 @@ +base_model: TheBloke/Llama-2-7B-GPTQ +gptq: true +gptq_disable_exllama: true +model_type: AutoModelForCausalLM +tokenizer_type: LlamaTokenizer +tokenizer_use_fast: true +tokenizer_legacy: true +load_in_8bit: false +load_in_4bit: false +strict: false +push_dataset_to_hub: +hf_use_auth_token: true +datasets: + - path: mhenrichsen/alpaca_2k_test + type: alpaca +dataset_prepared_path: +val_set_size: 0.05 +adapter: lora +lora_model_dir: +sequence_len: 4096 +sample_packing: +lora_r: 8 +lora_alpha: 32 +lora_dropout: 0.05 +lora_target_modules: + - k_proj + - o_proj + - q_proj + - v_proj +lora_target_linear: +lora_fan_in_fan_out: +wandb_project: +wandb_watch: +wandb_name: +wandb_log_model: +output_dir: ./model-out +gradient_accumulation_steps: 1 +micro_batch_size: 1 +num_epochs: 4 +optimizer: adamw_torch +adam_beta2: 0.95 +adam_eps: 0.00001 +max_grad_norm: 1.0 +torchdistx_path: +lr_scheduler: cosine +lr_quadratic_warmup: true +learning_rate: 0.000017 +train_on_inputs: false +group_by_length: false +bf16: false +fp16: false +float16: true +tf32: true +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: +sdp_attention: +flash_optimum: +warmup_steps: 100 +evals_per_epoch: 4 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.1 +special_tokens: + bos_token: "" + eos_token: "" + unk_token: "" diff --git a/examples/llama-2/loftq.yml b/examples/llama-2/loftq.yml new file mode 100644 index 0000000000000000000000000000000000000000..4529a912dc3a14fa319a5bc2c552219551715818 --- /dev/null +++ b/examples/llama-2/loftq.yml @@ -0,0 +1,69 @@ +base_model: NousResearch/Llama-2-7b-hf +model_type: LlamaForCausalLM +tokenizer_type: LlamaTokenizer + +load_in_8bit: false +load_in_4bit: false +strict: false + +datasets: + - path: mhenrichsen/alpaca_2k_test + type: alpaca +dataset_prepared_path: +val_set_size: 0.05 +output_dir: ./lora-out + +sequence_len: 4096 +sample_packing: true +pad_to_sequence_len: true + +adapter: lora +lora_model_dir: +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_linear: true +lora_fan_in_fan_out: +peft: + loftq_config: + loftq_bits: 4 + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 4 +micro_batch_size: 2 +num_epochs: 4 +optimizer: adamw_bnb_8bit +lr_scheduler: cosine +learning_rate: 0.0002 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: false + +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true +s2_attention: + +warmup_steps: 10 +evals_per_epoch: 4 +eval_table_size: +eval_max_new_tokens: 128 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: diff --git a/examples/llama-2/lora.yml b/examples/llama-2/lora.yml new file mode 100644 index 0000000000000000000000000000000000000000..a7793dce4cbe5fcfa314ad1595db7cc84adcc5b5 --- /dev/null +++ b/examples/llama-2/lora.yml @@ -0,0 +1,66 @@ +base_model: NousResearch/Llama-2-7b-hf +model_type: LlamaForCausalLM +tokenizer_type: LlamaTokenizer + +load_in_8bit: true +load_in_4bit: false +strict: false + +datasets: + - path: mhenrichsen/alpaca_2k_test + type: alpaca +dataset_prepared_path: +val_set_size: 0.05 +output_dir: ./lora-out + +sequence_len: 4096 +sample_packing: true +pad_to_sequence_len: true + +adapter: lora +lora_model_dir: +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_linear: true +lora_fan_in_fan_out: + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 4 +micro_batch_size: 2 +num_epochs: 4 +optimizer: adamw_bnb_8bit +lr_scheduler: cosine +learning_rate: 0.0002 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: false + +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true +s2_attention: + +warmup_steps: 10 +evals_per_epoch: 4 +eval_table_size: +eval_max_new_tokens: 128 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: diff --git a/examples/llama-2/qlora-fsdp.yml b/examples/llama-2/qlora-fsdp.yml new file mode 100644 index 0000000000000000000000000000000000000000..30916ed45a8e6dc88cf8780d1b2e1a1b0424c3c6 --- /dev/null +++ b/examples/llama-2/qlora-fsdp.yml @@ -0,0 +1,76 @@ +base_model: NousResearch/Llama-2-7b-hf +model_type: LlamaForCausalLM +tokenizer_type: LlamaTokenizer + +load_in_8bit: false +load_in_4bit: true +strict: false + +datasets: + - path: yahma/alpaca-cleaned + type: alpaca +dataset_prepared_path: last_run_prepared +val_set_size: 0.05 +output_dir: ./qlora-out + +adapter: qlora +lora_model_dir: + +sequence_len: 512 +sample_packing: false +pad_to_sequence_len: true + +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_modules: +lora_target_linear: true +lora_fan_in_fan_out: + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 4 +micro_batch_size: 4 +num_epochs: 4 +optimizer: adamw_torch +lr_scheduler: cosine +learning_rate: 0.00001 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: false + +gradient_checkpointing: true +gradient_checkpointing_kwargs: + use_reentrant: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true + +warmup_steps: 10 +evals_per_epoch: 4 +eval_table_size: +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0 +fsdp: + - full_shard +fsdp_config: + fsdp_limit_all_gathers: true + fsdp_sync_module_states: true + fsdp_offload_params: true + fsdp_use_orig_params: false + fsdp_cpu_ram_efficient_loading: true + fsdp_transformer_layer_cls_to_wrap: LlamaDecoderLayer + fsdp_state_dict_type: SHARDED_STATE_DICT +special_tokens: diff --git a/examples/llama-2/qlora.yml b/examples/llama-2/qlora.yml new file mode 100644 index 0000000000000000000000000000000000000000..834dbfb33a65dcefc1e8298d74a35bf75a6eafb8 --- /dev/null +++ b/examples/llama-2/qlora.yml @@ -0,0 +1,66 @@ +base_model: NousResearch/Llama-2-7b-hf +model_type: LlamaForCausalLM +tokenizer_type: LlamaTokenizer + +load_in_8bit: false +load_in_4bit: true +strict: false + +datasets: + - path: mhenrichsen/alpaca_2k_test + type: alpaca +dataset_prepared_path: +val_set_size: 0.05 +output_dir: ./qlora-out + +adapter: qlora +lora_model_dir: + +sequence_len: 4096 +sample_packing: true +pad_to_sequence_len: true + +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_modules: +lora_target_linear: true +lora_fan_in_fan_out: + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 4 +micro_batch_size: 2 +num_epochs: 4 +optimizer: paged_adamw_32bit +lr_scheduler: cosine +learning_rate: 0.0002 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: false + +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true + +warmup_steps: 10 +evals_per_epoch: 4 +eval_table_size: +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: diff --git a/examples/llama-2/relora.yml b/examples/llama-2/relora.yml new file mode 100644 index 0000000000000000000000000000000000000000..9fd19953c60190e71cc8326ec52405f26b6b9080 --- /dev/null +++ b/examples/llama-2/relora.yml @@ -0,0 +1,73 @@ +base_model: NousResearch/Llama-2-7b-hf +model_type: LlamaForCausalLM +tokenizer_type: LlamaTokenizer + + +load_in_8bit: false +load_in_4bit: true +strict: false + +datasets: + - path: teknium/GPT4-LLM-Cleaned + type: alpaca +dataset_prepared_path: +val_set_size: 0.05 +output_dir: ./relora-out + +adapter: qlora +lora_model_dir: + +sequence_len: 4096 +sample_packing: true +pad_to_sequence_len: true + +lora_r: 8 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_modules: +lora_target_linear: true +lora_fan_in_fan_out: + +relora_steps: 150 +relora_warmup_steps: 10 +relora_cpu_offload: false + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 4 +micro_batch_size: 4 +num_epochs: 4 +optimizer: adamw_bnb_8bit +lr_scheduler: cosine +learning_rate: 0.0002 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: false + +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true + +warmup_steps: 10 +evals_per_epoch: 4 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: + bos_token: "" + eos_token: "" + unk_token: "" diff --git a/examples/mamba/config.yml b/examples/mamba/config.yml new file mode 100644 index 0000000000000000000000000000000000000000..0a5223bcac7dd5cbe505522696e2b38aa3e81be1 --- /dev/null +++ b/examples/mamba/config.yml @@ -0,0 +1,61 @@ +base_model: state-spaces/mamba-2.8b +model_type: MambaLMHeadModel +tokenizer_type: AutoTokenizer +tokenizer_config: EleutherAI/gpt-neox-20b + +load_in_8bit: false +load_in_4bit: false +strict: false + +datasets: + - path: mhenrichsen/alpaca_2k_test + type: alpaca +dataset_prepared_path: +val_set_size: 0.0 +output_dir: ./out + +sequence_len: 2048 +sample_packing: false +pad_to_sequence_len: false + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 4 +micro_batch_size: 1 +num_epochs: 2 +optimizer: paged_adamw_8bit +lr_scheduler: cosine +learning_rate: 5e-5 + +train_on_inputs: false +group_by_length: true + +bf16: auto +fp16: +tf32: true + +gradient_checkpointing: false +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: + +warmup_steps: 10 +evals_per_epoch: 4 +eval_table_size: +eval_max_new_tokens: 128 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: +tokens: +save_safetensors: False diff --git a/examples/mistral/README.md b/examples/mistral/README.md new file mode 100644 index 0000000000000000000000000000000000000000..462c2d3e7bb167d04f4bec5da151dbc66eb8a608 --- /dev/null +++ b/examples/mistral/README.md @@ -0,0 +1,12 @@ +**Mistral 7B** is a language model with a total of 7.3 billion parameters, showcasing a notable performance across a variety of benchmarks. + +Fine Tune: +```shell +accelerate launch -m axolotl.cli.train examples/mistral/config.yml + +``` + +If you run into CUDA OOM, use deepspeed with config zero2.json: +```shell +accelerate launch -m axolotl.cli.train examples/mistral/config.yml --deepspeed deepspeed_configs/zero2.json +``` diff --git a/examples/mistral/config.yml b/examples/mistral/config.yml new file mode 100644 index 0000000000000000000000000000000000000000..c909c63e2213b0916a449c554eb4b60943823107 --- /dev/null +++ b/examples/mistral/config.yml @@ -0,0 +1,58 @@ +base_model: mistralai/Mistral-7B-v0.1 +model_type: MistralForCausalLM +tokenizer_type: LlamaTokenizer + +load_in_8bit: false +load_in_4bit: false +strict: false + +datasets: + - path: mhenrichsen/alpaca_2k_test + type: alpaca +dataset_prepared_path: +val_set_size: 0.05 +output_dir: ./out + +sequence_len: 8192 +sample_packing: true +pad_to_sequence_len: true +eval_sample_packing: false + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 4 +micro_batch_size: 2 +num_epochs: 4 +optimizer: adamw_bnb_8bit +lr_scheduler: cosine +learning_rate: 0.000005 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: false + +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true + +warmup_steps: 10 +evals_per_epoch: 4 +eval_table_size: +eval_max_new_tokens: 128 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: diff --git a/examples/mistral/lora-mps.yml b/examples/mistral/lora-mps.yml new file mode 100644 index 0000000000000000000000000000000000000000..31b0d527e2bb0379256d3153f0ea19e90b59534c --- /dev/null +++ b/examples/mistral/lora-mps.yml @@ -0,0 +1,79 @@ +base_model: mistralai/Mistral-7B-v0.1 +model_type: MistralForCausalLM +tokenizer_type: LlamaTokenizer + +load_in_8bit: false +load_in_4bit: false +strict: false + +datasets: + - path: mhenrichsen/alpaca_2k_test + type: alpaca +dataset_prepared_path: last_run_prepared +val_set_size: 0 +output_dir: ./lora-out +eval_sample_packing: false + +adapter: lora +lora_model_dir: + +sequence_len: 4096 +sample_packing: true +pad_to_sequence_len: true + +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_linear: true +lora_fan_in_fan_out: +lora_target_modules: + - gate_proj + - down_proj + - up_proj + - q_proj + - v_proj + - k_proj + - o_proj + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 8 +micro_batch_size: 1 +num_epochs: 2 +optimizer: adamw_torch +lr_scheduler: cosine +learning_rate: 0.0002 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: false +tf32: true + +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: false +sdp_attention: true + +loss_watchdog_threshold: 5.0 +loss_watchdog_patience: 3 + +warmup_steps: 10 +evals_per_epoch: 4 +eval_table_size: +eval_table_max_new_tokens: 128 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: diff --git a/examples/mistral/lora.yml b/examples/mistral/lora.yml new file mode 100644 index 0000000000000000000000000000000000000000..ac9ac0dd982ff835144d3fa6f1cd5afb094341e3 --- /dev/null +++ b/examples/mistral/lora.yml @@ -0,0 +1,77 @@ +base_model: mistralai/Mistral-7B-v0.1 +model_type: MistralForCausalLM +tokenizer_type: LlamaTokenizer + +load_in_8bit: true +load_in_4bit: false +strict: false + +datasets: + - path: mhenrichsen/alpaca_2k_test + type: alpaca +dataset_prepared_path: last_run_prepared +val_set_size: 0.1 +output_dir: ./lora-out + +adapter: lora +lora_model_dir: + +sequence_len: 8192 +sample_packing: true +pad_to_sequence_len: true + +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_linear: true +lora_fan_in_fan_out: +lora_target_modules: + - gate_proj + - down_proj + - up_proj + - q_proj + - v_proj + - k_proj + - o_proj + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 4 +micro_batch_size: 2 +num_epochs: 1 +optimizer: adamw_bnb_8bit +lr_scheduler: cosine +learning_rate: 0.0002 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: false + +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true + +loss_watchdog_threshold: 5.0 +loss_watchdog_patience: 3 + +warmup_steps: 10 +evals_per_epoch: 4 +eval_table_size: +eval_max_new_tokens: 128 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: diff --git a/examples/mistral/mixtral-qlora-fsdp.yml b/examples/mistral/mixtral-qlora-fsdp.yml new file mode 100644 index 0000000000000000000000000000000000000000..32db7073b7f5632066611681defa50d2e5385a1d --- /dev/null +++ b/examples/mistral/mixtral-qlora-fsdp.yml @@ -0,0 +1,74 @@ +base_model: mistralai/Mixtral-8x7B-v0.1 +model_type: AutoModelForCausalLM +tokenizer_type: LlamaTokenizer +trust_remote_code: true + +load_in_8bit: false +load_in_4bit: true +strict: false + +datasets: + - path: tatsu-lab/alpaca + type: alpaca +dataset_prepared_path: last_run_prepared +val_set_size: 0.02 +output_dir: ./qlora-out + +model_config: + output_router_logits: true + +adapter: qlora +lora_model_dir: + +sequence_len: 1024 +sample_packing: false +pad_to_sequence_len: false + +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_linear: true +lora_fan_in_fan_out: + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 4 +micro_batch_size: 2 +num_epochs: 1 +optimizer: paged_adamw_8bit +lr_scheduler: cosine +learning_rate: 0.0002 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: false + +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true + +loss_watchdog_threshold: 5.0 +loss_watchdog_patience: 3 + +warmup_steps: 10 +evals_per_epoch: 4 +eval_table_size: +eval_max_new_tokens: 128 +saves_per_epoch: 1 +debug: +weight_decay: 0.0 +fsdp: + - full_shard +fsdp_config: + fsdp_transformer_layer_cls_to_wrap: MixtralSparseMoeBlock +special_tokens: diff --git a/examples/mistral/mixtral.yml b/examples/mistral/mixtral.yml new file mode 100644 index 0000000000000000000000000000000000000000..5ee3da9d65981de3185559cdbbf6daa056c6dba8 --- /dev/null +++ b/examples/mistral/mixtral.yml @@ -0,0 +1,91 @@ +base_model: mistralai/Mixtral-8x7B-v0.1 +model_type: AutoModelForCausalLM +tokenizer_type: LlamaTokenizer +trust_remote_code: true + +load_in_8bit: false +load_in_4bit: true +strict: false + +datasets: + - path: tatsu-lab/alpaca + type: alpaca +dataset_prepared_path: last_run_prepared +val_set_size: 0.0 +output_dir: ./qlora-out + +## You can optionally freeze the entire model and unfreeze a subset of parameters +unfrozen_parameters: +# - ^lm_head.weight$ +# - ^model.embed_tokens.weight$[:32000] +# - model.layers.2[0-9]+.block_sparse_moe.gate +# - model.layers.2[0-9]+.block_sparse_moe.experts +# - model.layers.3[0-9]+.block_sparse_moe.gate +# - model.layers.3[0-9]+.block_sparse_moe.experts + +model_config: + output_router_logits: true + +adapter: qlora +lora_model_dir: + +sequence_len: 4096 +sample_packing: true +pad_to_sequence_len: true + +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_linear: true +lora_fan_in_fan_out: +#lora_target_modules: +# - gate +# - q_proj +# - k_proj +# - v_proj +# - o_proj +# - w1 +# - w2 +# - w3 + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 2 +micro_batch_size: 1 +num_epochs: 1 +optimizer: adamw_bnb_8bit +lr_scheduler: cosine +learning_rate: 0.0002 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: false + +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true + +loss_watchdog_threshold: 5.0 +loss_watchdog_patience: 3 + +warmup_steps: 10 +evals_per_epoch: 4 +eval_table_size: +eval_max_new_tokens: 128 +saves_per_epoch: 1 +debug: +deepspeed: deepspeed_configs/zero2.json +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: diff --git a/examples/mistral/qlora.yml b/examples/mistral/qlora.yml new file mode 100644 index 0000000000000000000000000000000000000000..6fbbb96183d1a3382f663d2bf8c9ba7eb5e0d7eb --- /dev/null +++ b/examples/mistral/qlora.yml @@ -0,0 +1,77 @@ +base_model: mistralai/Mistral-7B-v0.1 +model_type: MistralForCausalLM +tokenizer_type: LlamaTokenizer + +load_in_8bit: false +load_in_4bit: true +strict: false + +datasets: + - path: mhenrichsen/alpaca_2k_test + type: alpaca +dataset_prepared_path: last_run_prepared +val_set_size: 0.1 +output_dir: ./qlora-out + +adapter: qlora +lora_model_dir: + +sequence_len: 8192 +sample_packing: true +pad_to_sequence_len: true + +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_linear: true +lora_fan_in_fan_out: +lora_target_modules: + - gate_proj + - down_proj + - up_proj + - q_proj + - v_proj + - k_proj + - o_proj + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 4 +micro_batch_size: 2 +num_epochs: 1 +optimizer: adamw_bnb_8bit +lr_scheduler: cosine +learning_rate: 0.0002 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: false + +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true + +loss_watchdog_threshold: 5.0 +loss_watchdog_patience: 3 + +warmup_steps: 10 +evals_per_epoch: 4 +eval_table_size: +eval_max_new_tokens: 128 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: diff --git a/examples/mpt-7b/README.md b/examples/mpt-7b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..12c55688d2aefc559e8acde1a6fc354ce863fe19 --- /dev/null +++ b/examples/mpt-7b/README.md @@ -0,0 +1,6 @@ +# MPT-7B + +```shell +accelerate launch scripts/finetune.py examples/mpt-7b/config.yml + +``` diff --git a/examples/mpt-7b/config.yml b/examples/mpt-7b/config.yml new file mode 100644 index 0000000000000000000000000000000000000000..45e31266f1a73057c2f4cc1aabe7b01749a8bc5e --- /dev/null +++ b/examples/mpt-7b/config.yml @@ -0,0 +1,58 @@ +base_model: mosaicml/mpt-7b +tokenizer_type: AutoTokenizer +trust_remote_code: true # required for mpt as their model class is not merged into transformers yet +load_in_8bit: false +datasets: + - path: vicgalle/alpaca-gpt4 + type: alpaca +dataset_prepared_path: +val_set_size: 0.02 +adapter: +lora_model_dir: +sequence_len: 2048 +max_packed_sequence_len: +lora_r: 8 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_modules: + - q_proj + - v_proj +lora_fan_in_fan_out: false +wandb_project: mpt-alpaca-7b +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: +output_dir: ./mpt-alpaca-7b +gradient_accumulation_steps: 1 +micro_batch_size: 1 +num_epochs: 4 +optimizer: adamw_bnb_8bit +torchdistx_path: +lr_scheduler: cosine +learning_rate: 0.0000002 +train_on_inputs: false +group_by_length: false +bf16: auto +tf32: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 5 +xformers_attention: +flash_attention: +gptq_groupsize: +gptq_model_v1: +warmup_steps: 20 +evals_per_epoch: 4 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0001 +fsdp: +fsdp_config: +tokens: + pad_token: "<|padding|>" + bos_token: "<|endoftext|>" + eos_token: "<|endoftext|>" + unk_token: "<|endoftext|>" diff --git a/examples/openllama-3b/README.md b/examples/openllama-3b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3e9501a54f61577ab8f7301b6caebc3d4802000c --- /dev/null +++ b/examples/openllama-3b/README.md @@ -0,0 +1,16 @@ +# openllama-3b + +Basic full tune +```shell +accelerate launch scripts/finetune.py examples/openllama-3b/config.yml +``` + +LoRA +```shell +accelerate launch scripts/finetune.py examples/openllama-3b/lora.yml +``` + +QLoRA +```shell +accelerate launch scripts/finetune.py examples/openllama-3b/qlora.yml +``` diff --git a/examples/openllama-3b/config.yml b/examples/openllama-3b/config.yml new file mode 100644 index 0000000000000000000000000000000000000000..0a404c79d85114359412622dbc642117a5fab7f7 --- /dev/null +++ b/examples/openllama-3b/config.yml @@ -0,0 +1,62 @@ +base_model: openlm-research/open_llama_3b_v2 +model_type: LlamaForCausalLM +tokenizer_type: LlamaTokenizer +load_in_8bit: false +load_in_4bit: false +strict: false +push_dataset_to_hub: +datasets: + - path: teknium/GPT4-LLM-Cleaned + type: alpaca +dataset_prepared_path: +val_set_size: 0.02 +adapter: +lora_model_dir: +sequence_len: 1024 +sample_packing: true +lora_r: +lora_alpha: +lora_dropout: +lora_target_modules: +lora_target_linear: +lora_fan_in_fan_out: +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: +output_dir: ./openllama-out +gradient_accumulation_steps: 1 +micro_batch_size: 1 +num_epochs: 4 +optimizer: adamw_bnb_8bit +torchdistx_path: +lr_scheduler: cosine +learning_rate: 0.000003 +train_on_inputs: false +group_by_length: false +float16: true +bf16: false +fp16: false +tf32: false +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true +gptq_groupsize: +gptq_model_v1: +warmup_steps: 20 +evals_per_epoch: 4 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.1 +fsdp: +fsdp_config: +special_tokens: + bos_token: "" + eos_token: "" + unk_token: "" diff --git a/examples/openllama-3b/lora.yml b/examples/openllama-3b/lora.yml new file mode 100644 index 0000000000000000000000000000000000000000..b83b2db4e4aa03ffeb4b563fa82092cf72be960b --- /dev/null +++ b/examples/openllama-3b/lora.yml @@ -0,0 +1,68 @@ +base_model: openlm-research/open_llama_3b_v2 +model_type: LlamaForCausalLM +tokenizer_type: LlamaTokenizer +load_in_8bit: true +load_in_4bit: false +strict: false +push_dataset_to_hub: +datasets: + - path: teknium/GPT4-LLM-Cleaned + type: alpaca +dataset_prepared_path: +val_set_size: 0.02 +adapter: lora +lora_model_dir: +sequence_len: 1024 +sample_packing: true +lora_r: 8 +lora_alpha: 16 +lora_dropout: 0.0 +lora_target_modules: + - gate_proj + - down_proj + - up_proj + - q_proj + - v_proj + - k_proj + - o_proj +lora_fan_in_fan_out: +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: +output_dir: ./lora-out +gradient_accumulation_steps: 1 +micro_batch_size: 2 +num_epochs: 4 +optimizer: adamw_bnb_8bit +torchdistx_path: +lr_scheduler: cosine +learning_rate: 0.0002 +train_on_inputs: false +group_by_length: false +bf16: false +fp16: true +tf32: false +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true +gptq_groupsize: +s2_attention: +gptq_model_v1: +warmup_steps: 20 +evals_per_epoch: 4 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.1 +fsdp: +fsdp_config: +special_tokens: + bos_token: "" + eos_token: "" + unk_token: "" diff --git a/examples/openllama-3b/qlora.yml b/examples/openllama-3b/qlora.yml new file mode 100644 index 0000000000000000000000000000000000000000..3d6218b308a3133b35a1507a04360f8ce094a204 --- /dev/null +++ b/examples/openllama-3b/qlora.yml @@ -0,0 +1,61 @@ +base_model: openlm-research/open_llama_3b_v2 +model_type: LlamaForCausalLM +tokenizer_type: LlamaTokenizer +load_in_8bit: false +load_in_4bit: true +strict: false +push_dataset_to_hub: +datasets: + - path: teknium/GPT4-LLM-Cleaned + type: alpaca +dataset_prepared_path: +val_set_size: 0.05 +adapter: qlora +lora_model_dir: +sequence_len: 1024 +sample_packing: true +lora_r: 8 +lora_alpha: 32 +lora_dropout: 0.05 +lora_target_modules: +lora_target_linear: true +lora_fan_in_fan_out: +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: +output_dir: ./qlora-out +gradient_accumulation_steps: 1 +micro_batch_size: 2 +num_epochs: 4 +optimizer: paged_adamw_32bit +torchdistx_path: +lr_scheduler: cosine +learning_rate: 0.0002 +train_on_inputs: false +group_by_length: false +bf16: false +fp16: true +tf32: false +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true +gptq_groupsize: +gptq_model_v1: +warmup_steps: 20 +evals_per_epoch: 4 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.1 +fsdp: +fsdp_config: +special_tokens: + bos_token: "" + eos_token: "" + unk_token: "" diff --git a/examples/phi/README.md b/examples/phi/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1b9e8022e33fb9392bd1ce549d8c03892e5f22e7 --- /dev/null +++ b/examples/phi/README.md @@ -0,0 +1,11 @@ +# Phi + +Due to some nuances with the phi code, please use deepspeed when training phi for full finetune. + +```shell +accelerate launch -m axolotl.cli.train examples/phi/phi-ft.yml --deepspeed deepspeed_configs/zero1.json + +# OR + +python -m axolotl.cli.train examples/phi/phi-qlora.yml +``` diff --git a/examples/phi/phi-ft.yml b/examples/phi/phi-ft.yml new file mode 100644 index 0000000000000000000000000000000000000000..b21386f7077c10f78e5062a41f4e394a2ff85dac --- /dev/null +++ b/examples/phi/phi-ft.yml @@ -0,0 +1,71 @@ +base_model: microsoft/phi-1_5 +model_type: AutoModelForCausalLM +tokenizer_type: AutoTokenizer + +load_in_8bit: false +load_in_4bit: false +strict: false + +datasets: + - path: garage-bAInd/Open-Platypus + type: alpaca + +dataset_prepared_path: +val_set_size: 0.05 +output_dir: ./phi-sft-out + +sequence_len: 2048 +sample_packing: true +pad_to_sequence_len: true + +adapter: +lora_model_dir: +lora_r: +lora_alpha: +lora_dropout: +lora_target_linear: +lora_fan_in_fan_out: + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 1 +micro_batch_size: 2 +num_epochs: 4 +optimizer: adamw_torch +adam_beta2: 0.95 +adam_epsilon: 0.00001 +max_grad_norm: 1.0 +lr_scheduler: cosine +learning_rate: 0.000003 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: true + +gradient_checkpointing: true +gradient_checkpointing_kwargs: + use_reentrant: True +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true + +warmup_steps: 100 +evals_per_epoch: 4 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.1 +fsdp: +fsdp_config: +resize_token_embeddings_to_32x: true +special_tokens: + pad_token: "<|endoftext|>" diff --git a/examples/phi/phi-qlora.yml b/examples/phi/phi-qlora.yml new file mode 100644 index 0000000000000000000000000000000000000000..d2b5d661c9cf6d6a403883b86a54a6a7017234b2 --- /dev/null +++ b/examples/phi/phi-qlora.yml @@ -0,0 +1,71 @@ +base_model: microsoft/phi-1_5 +model_type: AutoModelForCausalLM +tokenizer_type: AutoTokenizer + +load_in_8bit: false +load_in_4bit: true +strict: false + +datasets: + - path: garage-bAInd/Open-Platypus + type: alpaca + +dataset_prepared_path: +val_set_size: 0.05 +output_dir: ./phi-sft-out + +sequence_len: 2048 +sample_packing: true +pad_to_sequence_len: true + +adapter: qlora +lora_model_dir: +lora_r: 64 +lora_alpha: 32 +lora_dropout: 0.05 +lora_target_linear: true +lora_fan_in_fan_out: + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 1 +micro_batch_size: 2 +num_epochs: 4 +optimizer: adamw_torch +adam_beta2: 0.95 +adam_epsilon: 0.00001 +max_grad_norm: 1.0 +lr_scheduler: cosine +learning_rate: 0.000003 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: true + +gradient_checkpointing: true +gradient_checkpointing_kwargs: + use_reentrant: True +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true + +warmup_steps: 100 +evals_per_epoch: 4 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.1 +fsdp: +fsdp_config: +resize_token_embeddings_to_32x: true +special_tokens: + pad_token: "<|endoftext|>" diff --git a/examples/phi/phi2-ft.yml b/examples/phi/phi2-ft.yml new file mode 100644 index 0000000000000000000000000000000000000000..7a2d05d0189e7f2f0da4d51ec672318d287a37fc --- /dev/null +++ b/examples/phi/phi2-ft.yml @@ -0,0 +1,71 @@ +base_model: microsoft/phi-2 +model_type: AutoModelForCausalLM +tokenizer_type: AutoTokenizer + +load_in_8bit: false +load_in_4bit: false +strict: false + +datasets: + - path: garage-bAInd/Open-Platypus + type: alpaca + +dataset_prepared_path: +val_set_size: 0.05 +output_dir: ./phi-sft-out + +sequence_len: 2048 +sample_packing: true +pad_to_sequence_len: true + +adapter: +lora_model_dir: +lora_r: +lora_alpha: +lora_dropout: +lora_target_linear: +lora_fan_in_fan_out: + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 1 +micro_batch_size: 2 +num_epochs: 4 +optimizer: adamw_torch +adam_beta2: 0.95 +adam_epsilon: 0.00001 +max_grad_norm: 1.0 +lr_scheduler: cosine +learning_rate: 0.000003 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: true + +gradient_checkpointing: true +gradient_checkpointing_kwargs: + use_reentrant: True +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true + +warmup_steps: 100 +evals_per_epoch: 4 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.1 +fsdp: +fsdp_config: +resize_token_embeddings_to_32x: true +special_tokens: + pad_token: "<|endoftext|>" diff --git a/examples/pythia-12b/README.md b/examples/pythia-12b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..123ffa7109a439845c88039cdcb3968da0b26a16 --- /dev/null +++ b/examples/pythia-12b/README.md @@ -0,0 +1,9 @@ +# Pythia 12B + +- Single-GPU A100 only (?) + +```shell +python scripts/finetune.py examples/pythia-12b/config.yml +``` + +⚠️ Multiple-GPU A100 - Doesn't seem to work with multi-gpu without causing OOM! ⚠️ diff --git a/examples/pythia-12b/config.yml b/examples/pythia-12b/config.yml new file mode 100644 index 0000000000000000000000000000000000000000..e44bba74512ae7be47acbe1fef3b47f3ad2e8af4 --- /dev/null +++ b/examples/pythia-12b/config.yml @@ -0,0 +1,48 @@ +base_model: EleutherAI/pythia-12b-deduped +base_model_ignore_patterns: pytorch* # prefer safetensors +model_type: GPTNeoXForCausalLM +tokenizer_type: AutoTokenizer +load_in_8bit: false +load_in_4bit: false +gptq: false +device_map: auto +datasets: + - path: vicgalle/alpaca-gpt4 + type: alpaca +dataset_prepared_path: +val_set_size: 0.05 +adapter: +lora_model_dir: +sequence_len: 2048 +max_packed_sequence_len: 2048 +lora_r: 64 +lora_alpha: 32 +lora_dropout: 0.0 +lora_target_modules: +lora_target_linear: true +lora_fan_in_fan_out: true # pythia/GPTNeoX lora specific +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: +output_dir: ./pythia-12b +gradient_accumulation_steps: 1 +micro_batch_size: 1 +num_epochs: 5 +learning_rate: 0.00003 +optimizer: adamw_bnb_8bit +lr_scheduler: cosine +train_on_inputs: false +group_by_length: false +bf16: false +fp16: false +float16: true +tf32: true +flash_optimum: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +gradient_checkpointing: true +fsdp: +fsdp_config: diff --git a/examples/pythia/lora.yml b/examples/pythia/lora.yml new file mode 100644 index 0000000000000000000000000000000000000000..7cb07fe2583dc0bcf16ab6fc4975c7b71e00f524 --- /dev/null +++ b/examples/pythia/lora.yml @@ -0,0 +1,37 @@ +base_model: EleutherAI/pythia-1.4b-deduped +load_in_8bit: true +datasets: + - path: teknium/GPT4-LLM-Cleaned + type: alpaca +dataset_prepared_path: +val_set_size: 0.05 +adapter: lora +lora_model_dir: +sequence_len: 512 +lora_r: 16 +lora_alpha: 32 +lora_dropout: 0.05 +lora_target_modules: + - query_key_value +lora_target_linear: +lora_fan_in_fan_out: true # pythia/GPTNeoX lora specific +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: +output_dir: ./lora-alpaca-pythia +gradient_accumulation_steps: 1 +micro_batch_size: 4 +num_epochs: 4 +learning_rate: 0.00001 +train_on_inputs: false +group_by_length: false +bf16: auto +tf32: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +weight_decay: 0.1 +evals_per_epoch: 4 +logging_steps: 1 diff --git a/examples/qwen/lora.yml b/examples/qwen/lora.yml new file mode 100644 index 0000000000000000000000000000000000000000..da4d784e0a0925c8968337ce905d8352c3c2c91f --- /dev/null +++ b/examples/qwen/lora.yml @@ -0,0 +1,67 @@ +base_model: Qwen/Qwen-7B +model_type: AutoModelForCausalLM +tokenizer_type: AutoTokenizer + +trust_remote_code: true + +load_in_8bit: true +load_in_4bit: false +strict: false + +datasets: + - path: mhenrichsen/alpaca_2k_test + type: alpaca +dataset_prepared_path: +val_set_size: 0.05 +output_dir: ./lora-out + +sequence_len: 2048 # supports up to 8192 +sample_packing: false +pad_to_sequence_len: + +adapter: lora +lora_model_dir: +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_linear: true +lora_fan_in_fan_out: + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 4 +micro_batch_size: 2 +num_epochs: 4 +optimizer: adamw_bnb_8bit +lr_scheduler: cosine +learning_rate: 0.0002 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: false + +gradient_checkpointing: false +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: + +warmup_steps: 10 +evals_per_epoch: 4 +eval_table_size: +eval_max_new_tokens: 128 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: diff --git a/examples/qwen/qlora.yml b/examples/qwen/qlora.yml new file mode 100644 index 0000000000000000000000000000000000000000..501a866b2d872adc94c09b3c864fd31c3e5bb986 --- /dev/null +++ b/examples/qwen/qlora.yml @@ -0,0 +1,67 @@ +base_model: Qwen/Qwen-7B +model_type: AutoModelForCausalLM +tokenizer_type: AutoTokenizer + +trust_remote_code: true + +load_in_8bit: false +load_in_4bit: true +strict: false + +datasets: + - path: mhenrichsen/alpaca_2k_test + type: alpaca +dataset_prepared_path: +val_set_size: 0.05 +output_dir: ./lora-out + +sequence_len: 2048 # supports up to 8192 +sample_packing: false +pad_to_sequence_len: + +adapter: qlora +lora_model_dir: +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_linear: true +lora_fan_in_fan_out: + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 4 +micro_batch_size: 2 +num_epochs: 4 +optimizer: adamw_bnb_8bit +lr_scheduler: cosine +learning_rate: 0.0002 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: false + +gradient_checkpointing: false +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: + +warmup_steps: 10 +evals_per_epoch: 4 +eval_table_size: +eval_max_new_tokens: 128 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: diff --git a/examples/redpajama/README.md b/examples/redpajama/README.md new file mode 100644 index 0000000000000000000000000000000000000000..bbd9a93ecc1a548de071250edd14209c8bfedc8f --- /dev/null +++ b/examples/redpajama/README.md @@ -0,0 +1,6 @@ +# RedPajama 3B preview release + +```shell +accelerate launch scripts/finetune.py examples/redpajama/config-3b.yml + +``` diff --git a/examples/redpajama/config-3b.yml b/examples/redpajama/config-3b.yml new file mode 100644 index 0000000000000000000000000000000000000000..5a42e2a9520110882a9952cd7a6bfe68185f79d7 --- /dev/null +++ b/examples/redpajama/config-3b.yml @@ -0,0 +1,59 @@ +base_model: togethercomputer/RedPajama-INCITE-Chat-3B-v1 +model_type: GPTNeoXForCausalLM +tokenizer_type: AutoTokenizer +trust_remote_code: +load_in_8bit: false +datasets: + - path: vicgalle/alpaca-gpt4 + type: alpaca +dataset_prepared_path: +val_set_size: 0.02 +adapter: +lora_model_dir: +sequence_len: 2048 +max_packed_sequence_len: +lora_r: 8 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_modules: + - q_proj + - v_proj +lora_fan_in_fan_out: false +wandb_project: redpajama-alpaca-3b +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: +output_dir: ./redpajama-alpaca-3b +batch_size: 4 +micro_batch_size: 1 +num_epochs: 4 +optimizer: adamw_bnb_8bit +torchdistx_path: +lr_scheduler: cosine +learning_rate: 0.0000002 +train_on_inputs: false +group_by_length: false +bf16: auto +tf32: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 5 +xformers_attention: +flash_attention: +gptq_groupsize: +gptq_model_v1: +warmup_steps: 20 +evals_per_epoch: 4 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0001 +fsdp: +fsdp_config: +tokens: + pad_token: "<|padding|>" + bos_token: "<|endoftext|>" + eos_token: "<|endoftext|>" + unk_token: "<|endoftext|>" diff --git a/examples/replit-3b/config-lora.yml b/examples/replit-3b/config-lora.yml new file mode 100644 index 0000000000000000000000000000000000000000..bdfe1bd854bfcfcca571259c574e7088500c4cfb --- /dev/null +++ b/examples/replit-3b/config-lora.yml @@ -0,0 +1,55 @@ +base_model: replit/replit-code-v1-3b +trust_remote_code: true +load_in_8bit: false +datasets: + - path: vicgalle/alpaca-gpt4 + type: alpaca +dataset_prepared_path: +val_set_size: 0.05 +adapter: lora +lora_model_dir: +sequence_len: 2048 +max_packed_sequence_len: +lora_r: 8 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_modules: + - Wqkv + - mlp_up + - mlp_down +lora_fan_in_fan_out: +wandb_project: lora-replit +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: +output_dir: ./lora-replit +batch_size: 8 +micro_batch_size: 1 +num_epochs: 4 +optimizer: +torchdistx_path: +lr_scheduler: +learning_rate: 0.00001 +train_on_inputs: false +group_by_length: false +bf16: auto +tf32: true +gradient_checkpointing: +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: +gptq_groupsize: +gptq_model_v1: +warmup_steps: 20 +evals_per_epoch: 4 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0 +fsdp: +fsdp_config: +#special_tokens: diff --git a/examples/stablelm-2/1.6b/fft.yml b/examples/stablelm-2/1.6b/fft.yml new file mode 100644 index 0000000000000000000000000000000000000000..f3fc16f867bcc11d707820778d3bb500d3a2aed2 --- /dev/null +++ b/examples/stablelm-2/1.6b/fft.yml @@ -0,0 +1,69 @@ +base_model: stabilityai/stablelm-2-1_6b +model_type: AutoModelForCausalLM +tokenizer_type: AutoTokenizer +trust_remote_code: true + +load_in_8bit: false +load_in_4bit: false +strict: false + +datasets: + - path: mhenrichsen/alpaca_2k_test + type: alpaca +dataset_prepared_path: last_run_prepared +val_set_size: 0.05 +output_dir: ./out + +sequence_len: 4096 +sample_packing: true +pad_to_sequence_len: true + +adapter: +lora_model_dir: +lora_r: +lora_alpha: +lora_dropout: +lora_target_linear: +lora_fan_in_fan_out: + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 1 +micro_batch_size: 1 +num_epochs: 1 +optimizer: adamw_bnb_8bit +lr_scheduler: cosine +learning_rate: 0.0002 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: false + +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true +flash_attn_cross_entropy: false +flash_attn_rms_norm: true +flash_attn_fuse_qkv: false +flash_attn_fuse_mlp: true + +warmup_steps: 100 +evals_per_epoch: 4 +eval_table_size: +saves_per_epoch: 1 +debug: +deepspeed: #deepspeed_configs/zero2.json # multi-gpu only +weight_decay: 0.1 +fsdp: +fsdp_config: +special_tokens: diff --git a/examples/stablelm-2/1.6b/lora.yml b/examples/stablelm-2/1.6b/lora.yml new file mode 100644 index 0000000000000000000000000000000000000000..c5051fab6e6e24134f01ee56b48b742c6e95995a --- /dev/null +++ b/examples/stablelm-2/1.6b/lora.yml @@ -0,0 +1,66 @@ +base_model: stabilityai/stablelm-2-1_6b +model_type: AutoModelForCausalLM +tokenizer_type: AutoTokenizer +trust_remote_code: true + +load_in_8bit: true +load_in_4bit: false +strict: false + +datasets: + - path: mhenrichsen/alpaca_2k_test + type: alpaca +dataset_prepared_path: +val_set_size: 0.05 +output_dir: ./lora-out + +sequence_len: 4096 +sample_packing: true +pad_to_sequence_len: true + +adapter: lora +lora_model_dir: +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_linear: true +lora_fan_in_fan_out: + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 1 +micro_batch_size: 1 +num_epochs: 1 +optimizer: adamw_bnb_8bit +lr_scheduler: cosine +learning_rate: 0.0002 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: false + +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true +flash_attn_cross_entropy: false +flash_attn_rms_norm: true + +warmup_steps: 10 +evals_per_epoch: 4 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: diff --git a/examples/stablelm-2/README.md b/examples/stablelm-2/README.md new file mode 100644 index 0000000000000000000000000000000000000000..0cdc2ffb0e532dd573051066e37fad9af192b084 --- /dev/null +++ b/examples/stablelm-2/README.md @@ -0,0 +1,36 @@ +# StableLM 2 + +This repository contains examples for training and processing using StableLM-2. It also includes a section to help you estimate the GPU requirements for your specific use case. + +## Estimating GPU Requirements + +| type | deepspeed | batch size | context length | vRAM GPU (GBs) | +|---------------|-----------|------------|----------------|----------------| +| full finetune | N/A | 1 | 4096 | ~21.5GBs | +| full finetune | zero2 | 1 | 4096 | ~20GBs | +| lora | N/A | 1 | 4096 | ~16.6GBs | + +The above are estimates and might differ slight depending on the setup for example whether you pack your sequence lengths or not (the above assumes you do to length 4096). + +This blog post from Hamel Husain was a great resource for estimating these numbers: https://hamel.dev/notes/llm/03_estimating_vram.html + +## Training +We have example scripts here for both full finetuning and lora using the popular alpaca dataset: + +```shell +# preprocess the dataset +CUDA_VISIBLE_DEVICES="" python -m axolotl.cli.preprocess examples/stablelm-2/1.6b/lora.yml +``` + +Single GPU Training: +```shell +python -m axolotl.cli.train examples/stablelm-2/fft.yml --deepspeed deepspeed_configs/zero2.json +# OR +python -m axolotl.cli.train examples/stablelm-2/1.6b/lora.yml +``` + +Multinode GPU Training with `accelerate`: +```shell +# make sure you've configured accelerate properly +accelerate launch -m axolotl.cli.train examples/stablelm-2/1.6b/fft.yml --deepspeed deepspeed_configs/zero2.json +``` diff --git a/examples/starcoder2/qlora.yml b/examples/starcoder2/qlora.yml new file mode 100644 index 0000000000000000000000000000000000000000..1efdfbc8e0905988936ef59c502c162ac8b2aae1 --- /dev/null +++ b/examples/starcoder2/qlora.yml @@ -0,0 +1,69 @@ +base_model: bigcode/starcoder2-3b + +load_in_8bit: false +load_in_4bit: true +strict: false + +datasets: + - path: mhenrichsen/alpaca_2k_test + type: alpaca + + +dataset_prepared_path: +val_set_size: 0.2 +output_dir: ./qlora + +adapter: qlora +lora_model_dir: + +sequence_len: 8192 +sample_packing: true +pad_to_sequence_len: true + +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_modules: +lora_target_linear: true +lora_fan_in_fan_out: + +wandb_project: +wandb_entity: +wandb_watch: +wandb_run_id: +wandb_log_model: + +gradient_accumulation_steps: 8 +micro_batch_size: 2 +num_epochs: 3 +optimizer: adamw_bnb_8bit +lr_scheduler: cosine +learning_rate: 2e-5 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: false +tf32: false + +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true + +warmup_steps: 20 +evals_per_epoch: 4 +eval_steps: +eval_table_size: +saves_per_epoch: 4 +save_steps: +save_total_limit: 2 +debug: +deepspeed: +weight_decay: +fsdp: +fsdp_config: +special_tokens: diff --git a/examples/tiny-llama/README.md b/examples/tiny-llama/README.md new file mode 100644 index 0000000000000000000000000000000000000000..467c06ec87a43569560264c549332dbc00b57b7a --- /dev/null +++ b/examples/tiny-llama/README.md @@ -0,0 +1,17 @@ +# Overview + +This is a simple example of how to finetune TinyLlama1.1B using either lora or qlora: + +LoRa: + +``` +accelerate launch -m axolotl.cli.train examples/tiny-llama/lora.yml +``` + +qLoRa: + +``` +accelerate launch -m axolotl.cli.train examples/tiny-llama/qlora.yml +``` + +Both take about 10 minutes to complete on a 4090. diff --git a/examples/tiny-llama/lora-mps.yml b/examples/tiny-llama/lora-mps.yml new file mode 100644 index 0000000000000000000000000000000000000000..fd7b02cacac16eaf2cabbac675adb38d38cceca7 --- /dev/null +++ b/examples/tiny-llama/lora-mps.yml @@ -0,0 +1,64 @@ +base_model: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T +model_type: LlamaForCausalLM +tokenizer_type: LlamaTokenizer + +load_in_8bit: true +load_in_4bit: false +strict: false + +datasets: + - path: mhenrichsen/alpaca_2k_test + type: alpaca +dataset_prepared_path: +val_set_size: 0 +output_dir: ./lora-out + +sequence_len: 4096 +sample_packing: true +pad_to_sequence_len: true +eval_sample_packing: false + +adapter: lora +lora_model_dir: +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_linear: true +lora_fan_in_fan_out: + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 4 +micro_batch_size: 2 +num_epochs: 4 +optimizer: adamw_torch +lr_scheduler: cosine +learning_rate: 0.0002 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: false +tf32: true + +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: false + +warmup_steps: 10 +evals_per_epoch: 0 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: diff --git a/examples/tiny-llama/lora.yml b/examples/tiny-llama/lora.yml new file mode 100644 index 0000000000000000000000000000000000000000..4a16f14b92d794d26a41be7e88ca1fe760c9c0a5 --- /dev/null +++ b/examples/tiny-llama/lora.yml @@ -0,0 +1,64 @@ +base_model: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T +model_type: LlamaForCausalLM +tokenizer_type: LlamaTokenizer + +load_in_8bit: true +load_in_4bit: false +strict: false + +datasets: + - path: mhenrichsen/alpaca_2k_test + type: alpaca +dataset_prepared_path: +val_set_size: 0.05 +output_dir: ./lora-out + +sequence_len: 4096 +sample_packing: true +eval_sample_packing: false +pad_to_sequence_len: true + +adapter: lora +lora_model_dir: +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_linear: true +lora_fan_in_fan_out: + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 4 +micro_batch_size: 2 +num_epochs: 4 +optimizer: adamw_bnb_8bit +lr_scheduler: cosine +learning_rate: 0.0002 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: false + +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true + +warmup_steps: 10 +evals_per_epoch: 4 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: diff --git a/examples/tiny-llama/pretrain.yml b/examples/tiny-llama/pretrain.yml new file mode 100644 index 0000000000000000000000000000000000000000..3b68a7f5477bfc96b3e79f86f5b9d4063b18f4d3 --- /dev/null +++ b/examples/tiny-llama/pretrain.yml @@ -0,0 +1,58 @@ +base_model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 + +model_type: LlamaForCausalLM +tokenizer_type: LlamaTokenizer + +load_in_8bit: false +load_in_4bit: false +strict: false + +max_steps: 200 +pretraining_dataset: + path: c4 + name: en + type: pretrain +dataset_prepared_path: +val_set_size: 0.0 +output_dir: ./model-out + +sequence_len: 2048 +sample_packing: true + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 4 +micro_batch_size: 2 +num_epochs: 4 +optimizer: adamw_bnb_8bit +lr_scheduler: cosine +learning_rate: 0.0002 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: false + +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true + +warmup_steps: 10 +evals_per_epoch: +eval_table_size: +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: diff --git a/examples/tiny-llama/qlora.yml b/examples/tiny-llama/qlora.yml new file mode 100644 index 0000000000000000000000000000000000000000..3ea313c838cf69e930dcbe82e268efce049ffda3 --- /dev/null +++ b/examples/tiny-llama/qlora.yml @@ -0,0 +1,65 @@ +base_model: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T +model_type: LlamaForCausalLM +tokenizer_type: LlamaTokenizer + +load_in_8bit: false +load_in_4bit: true +strict: false + +datasets: + - path: mhenrichsen/alpaca_2k_test + type: alpaca +dataset_prepared_path: +val_set_size: 0.05 +output_dir: ./qlora-out + +adapter: qlora +lora_model_dir: + +sequence_len: 4096 +sample_packing: true +pad_to_sequence_len: true + +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_modules: +lora_target_linear: true +lora_fan_in_fan_out: + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 4 +micro_batch_size: 2 +num_epochs: 4 +optimizer: paged_adamw_32bit +lr_scheduler: cosine +learning_rate: 0.0002 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: false + +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true + +warmup_steps: 10 +evals_per_epoch: 4 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: diff --git a/examples/xgen-7b/xgen-7b-8k-qlora.yml b/examples/xgen-7b/xgen-7b-8k-qlora.yml new file mode 100644 index 0000000000000000000000000000000000000000..e3faa01bdb79dd97abd816d2c8f9fd6b8bc3675c --- /dev/null +++ b/examples/xgen-7b/xgen-7b-8k-qlora.yml @@ -0,0 +1,90 @@ +# An example finetuning Saleforce's XGen-7b model with 8k context using qlora +# on Tim Dettmer's Guanaco dataset. +base_model: Salesforce/xgen-7b-8k-base +trust_remote_code: true +model_type: AutoModelForCausalLM +tokenizer_type: AutoTokenizer +load_in_8bit: false +# enable 4bit for QLoRA +load_in_4bit: true +gptq: false +strict: false +push_dataset_to_hub: +datasets: + - path: timdettmers/openassistant-guanaco + data_files: + - openassistant_best_replies_train.jsonl + type: "completion" +dataset_prepared_path: +val_set_size: 0.05 +# enable QLoRA +adapter: qlora +lora_model_dir: +sequence_len: 8192 +max_packed_sequence_len: + +# hyperparameters from QLoRA paper Appendix B.2 +# "We find hyperparameters to be largely robust across datasets" +lora_r: 64 +lora_alpha: 16 +# 0.1 for models up to 13B +# 0.05 for 33B and 65B models +lora_dropout: 0.05 +# add LoRA modules on all linear layers of the base model +lora_target_modules: +lora_target_linear: true +lora_fan_in_fan_out: + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: +output_dir: ./qlora-out + +# QLoRA paper Table 9 +# - 16 for 7b & 13b +# - 32 for 33b, 64 for 64b +# Max size tested on A6000 +# - 7b: 40 +# - 40b: 4 +# decrease if OOM, increase for max VRAM utilization +micro_batch_size: 1 +gradient_accumulation_steps: 1 +num_epochs: 4 +# Optimizer for QLoRA +optimizer: paged_adamw_32bit +torchdistx_path: +lr_scheduler: cosine +# QLoRA paper Table 9 +# - 2e-4 for 7b & 13b +# - 1e-4 for 33b & 64b +learning_rate: 0.00002 +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: +tf32: false +gradient_checkpointing: true +# stop training after this many evaluation losses have increased in a row +# https://huggingface.co/transformers/v4.2.2/_modules/transformers/trainer_callback.html#EarlyStoppingCallback +early_stopping_patience: 3 +resume_from_checkpoint: +auto_resume_from_checkpoints: true +local_rank: +logging_steps: 1 +xformers_attention: true +flash_attention: +gptq_groupsize: +gptq_model_v1: +warmup_steps: 10 +evals_per_epoch: 4 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0 +special_tokens: + eos_token: "<|endoftext|>" + bos_token: "<|endoftext|>" + unk_token: "<|endoftext|>" + pad_token: "<|endoftext|>" diff --git a/examples/yi-34B-chat/README.md b/examples/yi-34B-chat/README.md new file mode 100644 index 0000000000000000000000000000000000000000..07078850fbb9454b270253765e631bfcd78cf988 --- /dev/null +++ b/examples/yi-34B-chat/README.md @@ -0,0 +1,5 @@ +# Overview + +This is an example of a Yi-34B-Chat configuration. It demonstrates that it is possible to finetune a 34B model on a GPU with 24GB of VRAM. + +Tested on an RTX 4090 with `python -m axolotl.cli.train examples/mistral/qlora.yml`, a single epoch of finetuning on the alpaca dataset using qlora runs in 47 mins, using 97% of available memory. diff --git a/examples/yi-34B-chat/qlora.yml b/examples/yi-34B-chat/qlora.yml new file mode 100644 index 0000000000000000000000000000000000000000..dc8c37d18796a13d17bda1aa2974224d2a0af15a --- /dev/null +++ b/examples/yi-34B-chat/qlora.yml @@ -0,0 +1,75 @@ +base_model: 01-ai/Yi-34B-Chat +model_type: LlamaForCausalLM +tokenizer_type: LlamaTokenizer + +load_in_8bit: false +load_in_4bit: true +strict: false +sequence_len: 1024 +bf16: auto +fp16: +tf32: false +flash_attention: true +special_tokens: + bos_token: "<|startoftext|>" + eos_token: "<|endoftext|>" + unk_token: "" + +# Data +datasets: + - path: mhenrichsen/alpaca_2k_test + type: alpaca +warmup_steps: 10 + +# Iterations +num_epochs: 1 + +# Evaluation +val_set_size: 0.1 +evals_per_epoch: 5 +eval_table_size: +eval_max_new_tokens: 128 +eval_sample_packing: false +eval_batch_size: 1 + +# LoRA +output_dir: ./qlora-out +adapter: qlora +lora_model_dir: +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_linear: true +lora_fan_in_fan_out: +lora_target_modules: + +# Sampling +sample_packing: false +pad_to_sequence_len: false + +# Batching +gradient_accumulation_steps: 4 +micro_batch_size: 1 +gradient_checkpointing: true + +# wandb +wandb_project: + +# Optimizer +optimizer: paged_adamw_8bit +lr_scheduler: cosine +learning_rate: 0.0002 + +# Misc +train_on_inputs: false +group_by_length: false +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +debug: +deepspeed: +weight_decay: 0 +fsdp: +fsdp_config: diff --git a/favicon.jpg b/favicon.jpg new file mode 100644 index 0000000000000000000000000000000000000000..43c69024430555f849ee0077664f194dae8d912f Binary files /dev/null and b/favicon.jpg differ diff --git a/git-lfs-3.2.0/CHANGELOG.md b/git-lfs-3.2.0/CHANGELOG.md new file mode 100644 index 0000000000000000000000000000000000000000..d941d6544f60e2be15ff346512e669c9dbb69afc --- /dev/null +++ b/git-lfs-3.2.0/CHANGELOG.md @@ -0,0 +1,2225 @@ +# Git LFS Changelog + +## 3.2.0 (25 May 2022) + +This release is a feature release which includes support for machine-readable +formats for a couple more commands, plus the ability to automatically merge +LFS-based text files from the command-line. It's likely that the merge driver +will see future improvements, but if you have feedback on the design, please use +the discussions feature. + +Note that our binary release archives no longer unpack into the current +directory, and now contain a top-level directory just like the source archives +do. + +We would like to extend a special thanks to the following open-source +contributors: + +* @bbodenmiller for fixing the formatting in our manual pages +* @breyed for fixing a typo in our manual pages +* @btoll for improving our README +* @rcoup for fixing our Accept header syntax +* @vtbassmatt for documenting our deprecation of NTLM + +### Features + +* ls-files: add a --json option #5007 (@bk2204) +* Add --json output for git lfs track #5006 (@bk2204) +* Add a merge driver #4970 (@bk2204) +* lfs: don't write hooks when they haven't changed #4935 (@bk2204) +* Tarballs, not tarbombs #4980 (@bk2204) + +### Bugs + +* Apply several Makefile fixes for Windows #5016 (@bk2204) +* git: don't panic on pkt-line without equals #4982 (@bk2204) +* lfshttp: fix invalid Accept header syntax #4996 (@rcoup) +* Grammar fix #4981 (@breyed) +* Use `gitignore`-style path matching for additional commands #4951 (@chrisd8088) +* Avoid pruning when identical files both match and do not match `lfs.fetchexclude` #4973 (@chrisd8088) +* Apply `lfs.fetchexclude` filter to previous commits when pruning #4968 (@chrisd8088) +* Update and correct several error message strings #4943 (@chrisd8088) +* script/upload: correct RHEL 8 package repo #4936 (@bk2204) +* lfs: add old hook content to the list of old hooks #4878 (@bk2204) +* .github/workflows: install packagecloud gem #4873 (@bk2204) + +### Misc + +* Update distros for packagecloud.io #5010 (@bk2204) +* lfshttp: log the Negotiate error on failure #5000 (@bk2204) +* Build CI on Windows 2022 #4997 (@chrisd8088) +* workflows: use ronn-ng #4992 (@bk2204) +* Multiple hash support #4971 (@bk2204) +* note deprecation of NTLM #4987 (@vtbassmatt) +* Update to Go 1.18, drop older Go version support, and update modules and dependencies #4963 (@chrisd8088) +* Update tests to check `prune` command excludes `lfs.fetchexclude` paths #4964 (@chrisd8088) +* Add test to check `prune` command retains tagged unpushed objects #4962 (@chrisd8088) +* Adjust test helpers and tests related to path filtering #4960 (@chrisd8088) +* Include shell path in restricted `PATH` in credential helper path test #4959 (@chrisd8088) +* Build test helper commands with `.exe` file extension on Windows #4954 (@chrisd8088) +* Update Windows signing certificate SHA hash in `Makefile` #4946 (@chrisd8088) +* remove unused `Pipe[Media]Command()` functions #4942 (@chrisd8088) +* Makefile: remove legacy trimpath code #4938 (@bk2204) +* add Inno Setup check of Git install paths and remove old uninstaller checks #4925 (@chrisd8088) +* note `git lfs push --all` only pushes local refs in man page #4898 (@chrisd8088) +* Build man pages into per-section subdirectories #4890 (@chrisd8088) +* Call out destructive command in README #4880 (@btoll) +* Improve formatting #4863 (@bbodenmiller) +* docs/howto: remind core team member to check Actions workflows #4868 (@bk2204) +* .github: fix syntax error in release workflow #4866 (@bk2204) + +## 3.1.4 (19 Apr 2022) + +This release is a bugfix release to fix some problems during the build of +v3.1.3. There are otherwise no substantial changes from v3.1.3. + +### Misc + +* Use only Windows Server 2019 runners for CI in GitHub Actions #4883 (@chrisd8088) +* remove unused `Pipe[Media]Command()` functions #4942 (@chrisd8088) + +## 3.1.3 (19 Apr 2022) + +This release introduces a security fix for Windows systems, which has been +assigned CVE-2022-24826. + +On Windows, if Git LFS operates on a malicious repository with a `..exe` file as +well as a file named `git.exe`, and `git.exe` is not found in PATH, the `..exe` +program will be executed, permitting the attacker to execute arbitrary code. +Similarly, if the malicious repository contains files named `..exe` and +`cygpath.exe`, and `cygpath.exe` is not found in PATH, the `..exe` program will +be executed when certain Git LFS commands are run. + +This security problem does not affect Unix systems. This is the same issue as +CVE-2020-27955 and CVE-2021-21237, but the fix for those issue was incomplete +and certain options can still cause the problem to occur. + +This occurs because on Windows, Go includes (and prefers) the current directory +when the name of a command run does not contain a directory separator, and it +continues to search for programs even when the specified program name is empty. +This has been solved by failing if the path is empty or not found. + +We would like to extend a special thanks to the following open-source +contributors: + +* @yuske for reporting this to us responsibly + +### Bugs + +* Report errors when finding executables and revise PATH search tests (@chrisd8088) + +### Misc + +* Update Windows signing certificate SHA hash in Makefile (@chrisd8088) + +## 3.1.2 (16 Feb 2022) + +This is a bugfix release which fixes a bug in `git lfs install` and some issues +in our CI release processes, including one that prevented arm64 packages for +Debian 11 from being uploaded. + +### Bugs + +* lfs: add old hook content to the list of old hooks #4878 (@bk2204) + +### Misc + +* Revert "Merge pull request #4795 from bk2204/actions-checkout-v2" #4877 (@bk2204) +* .github/workflows: install packagecloud gem #4873 (@bk2204) + +## 3.1.1 (14 Feb 2022) + +This is a bugfix release which fixes a syntax error in the release workflow. + +### Misc + +* .github: fix syntax error in release workflow #4866 (@bk2204) + +## 3.1.0 (14 Feb 2022) + +This release is a feature release which includes support for fallback from +Negotiate to Basic authentication, new ARM64 packages for Debian 11, a new +localization infrastructure, and improved netrc support, in addition to various +bug fixes. In addition, we've addressed a performance regression for `git lfs +migrate import` that was introduced in v3.0.2. + +At the moment, there are no translations available, but if you are interested in +contributing to one, please reach out in an issue. For compatibility with +Windows and to retain the ability to have a single relocatable binary, the +translations are included in the binary at build time. + +We would like to extend a special thanks to the following open source +contributors: + +* @donno2048 for improving our error checking +* @howardlyliao for improved netrc support +* @HermannDppes for improving our large file warning on Windows +* @rex4539 for fixing various typos throughout our codebase + +### Features + +* Fall back from Negotiate to Basic #4815 (@bk2204) +* Add basic support for localization #4729 (@bk2204) +* Add support for ARM64 Debian packages #4728 (@bk2204) +* netrc: consider same machine may have different login names #4726 (@howardlyliao) + +### Bugs + +* smudge: honor GIT_LFS_SKIP_SMUDGE with checkout-index #4860 (@bk2204) +* fix `git lfs fsck --objects A..B` handling and drop all left/right ref terminology #4859 (@chrisd8088) +* halt migration when `.gitattributes` symbolic link encountered #4849 (@chrisd8088) +* fix merging of `.gitattributes` with execute file mode during migration #4835 (@chrisd8088) +* Fix migrate import speed regression #4813 (@bk2204) +* Fix typos #4806 (@rex4539) +* Move `err` checking to before the value was used #4776 (@donno2048) +* migrate import: don't allow path filters with --above #4771 (@bk2204) +* avoid panic on checkout with `--to` but no path, and update checkout manual #4766 (@chrisd8088) +* creds: gracefully handle lack of askpass helper #4759 (@bk2204) +* post-checkout: don't modify permissions of untracked files #4760 (@bk2204) +* use gitattributes filepath matching for migrate filter options #4758 (@chrisd8088) +* Avoid errors in git lfs env #4713 (@bk2204) +* fs: specify a file as existing if it's empty #4654 (@bk2204) +* Fix bound for largefilewarning #4633 (@HermannDppes) + +### Misc + +* build missing man pages and correct HTML renderings #4851 (@chrisd8088) +* Update and mark message strings for translation #4846 (@chrisd8088) +* Mark almost all strings for translation #4781 (@bk2204) +* .github/workflows: switch to actions/checkout@v2 #4795 (@bk2204) +* script/packagecloud: update for latest distros #4794 (@bk2204) +* filter-process: don't print large file warning on fixed versions #4768 (@bk2204) +* ssh: avoid using -- where possible #4741 (@bk2204) +* vendor,go.*: update x/crypto and dependencies #4738 (@chrisd8088) +* Stop supporting Go older than 1.13 #4641 (@bk2204) + +## 3.0.2 (28 Oct 2021) + +This release is a bugfix release which fixes a variety of problems seen since +3.0.0, including problems with empty files, `git lfs fsck --pointers`, and +the testsuite. + +We would like to extend a special thanks to the following open-source +contributors: + +* @fh1ch for patches to make things work better on Alpine Linux +* @pyckle for fixing our handling of filenames in `git lfs migrate import` +* @ycongal-smile for fixing `git lfs migrate import` with similarly named files + +### Bugs + +* Fix two types of misdetection in git lfs fsck #4697 (@bk2204) +* lfs: don't flag non-LFS files as invalid pointers #4691 (@bk2204) +* git: honor GIT_OBJECT_DIRECTORY #4686 (@bk2204) +* migrate: properly escape blob filenames #4683 (@pyckle) +* ls-files: don't process empty files as pointers #4681 (@bk2204) +* Call migrate() BlobFn on every blob #4671 (@ycongal-smile) +* Correct t-lock regular expression to be musl compatible #4673 (@fh1ch) + +### Misc + +* Allow git-lfs-transfer integration tests to be skipped #4677 (@fh1ch) +* Make CI environment GIT prefix grep more specific #4678 (@fh1ch) + +## 3.0.1 (28 Sep 2021) + +This release is a bugfix release which fixes the Windows ARM64 build process and +addresses a regression in support for empty files in pull and fetch. + +We would like to extend a special thanks to the following open-source +contributors: + +* @dennisameling for fixing support for Windows on ARM64 + +### Bugs + +* Fix Windows arm64 release #4647 (@dennisameling) +* fs: specify a file as existing if it's empty #4654 (@bk2204) + +## 3.0.0 (24 Sep 2021) + +This release is a major new release and introduces several new features, such as +a pure SSH-based protocol, packages for several new OS versions, support for +ARM64 Windows, Git-compatible pattern matching, and locking multiple files on +the command line, among other items. + +When connecting over SSH, the first attempt will be made to use +`git-lfs-transfer`, the pure SSH protocol, and if it fails, Git LFS will fall +back to the hybrid protocol using `git-lfs-authenticate`. Note that no major +forges are known to support the pure SSH protocol at this time. + +Because it is a major release, we've made some backwards-incompatible changes. +A (possibly incomplete) list of them is as follows: + +* NTLM support has been completely removed, since nobody volunteered to fix + issues in it. Users are advised to use Kerberos or Basic authentication + instead. +* When using an SSH URL (that is, the syntax starting with `ssh://`), the + leading slash is not stripped off when invoking `git-lfs-authenticate` or + `git-lfs-transfer`. This is compatible with the behavior of Git when invoking + commands over SSH. +* `git lfs fsck` now additionally checks that pointers are canonical and that + files that are supposed to be LFS files actually are. It also exits nonzero + if any problem is found. +* Pattern matching should be stricter and should either match the behavior of + `.gitattributes` or `.gitignore`, as appropriate. Deviations from Git's + behavior will henceforth be treated as bugs and fixed accordingly. +* Git LFS will now write a Git LFS repository format version into the + repository. This is designed to allow future extension with incompatible + changes. Repositories without this version will be assumed to be version 0. + Note that this is different from, but modeled on, Git's repository format + version. +* `git lfs lock` and `git lfs unlock` now handle multiple pathname arguments and + the JSON format has changed to handle multiple responses. +* The Go package name now contains a version number. This should have no effect + on users because we don't provide a stable Go ABI. +* Empty components in `PATH` are no longer treated as the current directory on + Windows because unintentionally having such empty components is common and the + behavior was surprising. + +We would like to extend a special thanks to the following open-source +contributors: + +* @codykrieger for ensuring that we process includes correctly +* @corngood for fixing a hang in prune +* @dennisameling for adding support for Windows on ARM64 +* @fh1ch for fixing our 429 handling +* @geki-yaba for fixing problems with askpass on Cygwin +* @gison93 for fixing a bug in our documentation +* @jvimr for ensuring our Debian packages are built properly +* @opohorel for ensuring our copyright notices were up to date +* @rhansen for fixing systems where / is a repository +* @sergiou87 for improving support for cross builds +* @slonopotamus for improving our error handling +* @stanhu for improving our handling of invalid OIDs +* @Timmmm for improving our support of .lfsconfig +* @tklauser for avoiding the need for cgo on macOS + +### Features + +* Advertise hash algorithm supported in batch request #4624 (@bk2204) +* Bump package version to v3 #4611 (@bk2204) +* Update OS versions #4610 (@bk2204) +* Add support for Debian 11 #4592 (@bk2204) +* Support for locking and unlocking multiple files #4604 (@bk2204) +* Add support for Windows ARM64 #4586 (@dennisameling) +* LFS repository format version #4552 (@bk2204) +* Pure SSH-based protocol #4446 (@bk2204) +* Make fsck able to check for invalid pointers #4525 (@bk2204) +* Add --fixup option to migrate info command #4501 (@chrisd8088) +* Allow reporting of LFS pointers separately in migrate info command #4436 (@chrisd8088) +* Add config variables for default remotes #4469 (@bk2204) +* Make lfshttp package builds more portable #4476 (@bk2204) +* Mark skipdownloaderrors as safe #4468 (@Timmmm) +* Make migrate commands default to preserving uncommitted changes #4454 (@chrisd8088) +* Darwin ARM64 support #4437 (@bk2204) +* tools: implement cloneFileSyscall on darwin without cgo #4387 (@tklauser) +* prune: add options to be more aggressive about pruning #4368 (@bk2204) + +### Bugs + +* corrected debian 11 & 12 derived variants #4622 (@jvimr) +* urlconfig: anchor regexp for key matching #4598 (@bk2204) +* filepathfilter: always use Git-compatible pattern matching #4556 (@bk2204) +* debian and rpm: Pass `--skip-repo` to `install` and `uninstall` #4594 (@rhansen) +* Fix hang in prune #4557 (@corngood) +* Disable ANSI color codes while log parsing and anchor diff regular expressions #4585 (@chrisd8088) +* Fix 429 retry-after handling for LFS batch API endpoint #4573 (@fh1ch) +* go.mod: bump gitobj to v2.0.2 #4555 (@bk2204) +* Fix locking with multiple paths and absolute paths #4535 (@bk2204) +* locking: avoid nil pointer dereference with invalid response #4509 (@bk2204) +* migrate import: make --above affect only individual files #4512 (@bk2204) +* fs: be a little less aggressive with cleanup #4490 (@bk2204) +* Fix downloadFile in gitfilter_smudge.go to actually propagate all errors #4478 (@slonopotamus) +* Translate Cygwin path patches for askpass helper and cert dir/file #4473 (@geki-yaba) +* Avoid panic on SIGINT by skipping cleanup when config uninitialized #4463 (@chrisd8088) +* Parse stash log entries parsimonously in prune command #4449 (@chrisd8088) +* docs: note that -I and -X override configuration settings #4442 (@bk2204) +* Make all checks of blobSizeCutoff consistent #4435 (@chrisd8088) +* Fix up handling of the "migrate info" command's --top option #4434 (@chrisd8088) +* Tighten LFS pointer regexp #4421 (@stanhu) +* invoke git-config with --includes to ensure it always evaluates `include.*` directives #4420 (@codykrieger) +* Canonicalize Windows paths like Git does #4418 (@bk2204) +* lfsapi: don't warn about duplicate but identical aliases #4413 (@bk2204) +* lfs: don't invoke diff drivers when pruning repositories #4407 (@bk2204) +* Consider scheme of request URL, not proxy URL, when choosing proxy #4396 (@bk2204) +* Makefile: allow make release to be run twice in a row #4344 (@bk2204) +* Makefile: don't fail the second time macOS builds are built #4341 (@bk2204) + +### Misc + +* subprocess: don't treat empty PATH component as . on Windows #4603 (@bk2204) +* Switch from which to command -v #4591 (@bk2204) +* Bump Go to 1.17 #4584 (@dennisameling) +* Add cautions about unstable Go API and fix GPG key link #4582 (@chrisd8088) +* Update go.mod module path with explicit v2 #4575 (@chrisd8088) +* Drop unused ClearTempStorage() transfer adapter method and tune stale comments #4554 (@chrisd8088) +* README: improve steps for building from source #4527 (@bk2204) +* Update license year #4513 (@opohorel) +* docs/man: add note re post-import use of checkout #4504 (@chrisd8088) +* Bump transitive dependencies #4502 (@bk2204) +* script/packagecloud: update distros #4494 (@bk2204) +* Use host architecture and OS when running `go generate` #4492 (@sergiou87) +* Bump go-spnego to the latest version #4482 (@bk2204) +* Update git-lfs-migrate man page and add description section #4458 (@chrisd8088) +* update x/text and dependencies #4455 (@opohorel) +* Use blobSizeCutoff in clean pointer buffer length check #4433 (@chrisd8088) +* tools: unset XDG_CONFIG_HOME for filetools test #4432 (@chrisd8088) +* vendor,go.{mod,sum}: update x/net and dependencies #4398 (@chrisd8088) +* Remove NTLM #4384 (@bk2204) +* gitobj 2.0.1 #4348 (@bk2204) +* Fix numbered list in git lfs examples #4347 (@gison93) +* Add test for download gzip transport compression #4345 (@bk2204) + +## 2.13.3 (26 Mar 2021) + +This release fixes two bugs that caused `git lfs prune` to hang, updates some +dependencies to versions which lack a security issue (which did not affect Git +LFS), and adds support for ARM64 builds on macOS. + +### Bugs + +* lfs: don't invoke diff drivers when pruning repositories #4407 (@bk2204) +* Parse stash log entries parsimonously in prune command #4449 (@chrisd8088) + +### Misc + +* Darwin ARM64 support #4437 (@bk2204) +* vendor,go.{mod,sum}: update x/net and dependencies #4398 (@chrisd8088) + +## 2.13.2 (13 Jan 2021) + +This release introduces a security fix for Windows systems, which has been +assigned CVE-2021-21237. + +On Windows, if Git LFS operates on a malicious repository with a git.bat or +git.exe file in the current directory, that program is executed, permitting the +attacker to execute arbitrary code. This security problem does not affect Unix +systems. This is the same issue as CVE-2020-27955, but the fix for that issue +was incomplete and certain options can still cause the problem to occur. + +This occurs because on Windows, Go includes (and prefers) the current directory +when the name of a command run does not contain a directory separator. This has +been solved by always using PATH to pre-resolve paths before handing them to Go. + +We would like to extend a special thanks to the following open-source +contributors: + +* @Ry0taK for reporting this to us responsibly + +### Bugs + +* Use subprocess for invoking all commands (@bk2204) + +## 2.13.1 (11 Dec 2020) + +This release fixes a bug in our build tooling that prevents our release process +from working properly. This release is otherwise identical to 2.13.0. + +### Misc + +* Makefile: don't fail the second time macOS builds are built #4341 (@bk2204) + +## 2.13.0 (10 Dec 2020) + +This release introduces several new features, such as the `--above` option to +`git lfs migrate import` and support for `socks5h` proxies. In addition, many +bugs have been fixed and several miscellaneous fixes have been included. + +Unless someone steps up to fix and maintain NTLM support, this will be the last +Git LFS release to support NTLM. See #4247 for more details. Note that Git LFS +supports Kerberos as well, which is far more secure and may be a viable +replacement in many situations. + +We would like to extend a special thanks to the following open-source +contributors: + +* @EliRibble for adding support for the `--above` option to `git lfs migrate import` +* @andrewshadura for adding support for the `GIT_LFS_SKIP_PUSH` environment variable +* @sinbad for fixing problems with retaining objects used by stashes +* @tklauser for cleaning up our use of error constants in the code + +### Features + +* Add --above parameter to 'migrate import'. #4276 (@EliRibble) +* Add GIT_LFS_SKIP_PUSH to allow skipping the pre-push hook #4202 (@andrewshadura) +* lfshttp: add support for socks5h proxies #4259 (@bk2204) +* Add manual pages to release assets #4230 (@bk2204) +* Honor GIT_WORK_TREE #4269 (@bk2204) + +### Bugs + +* Make git lfs migrate import handle missing extensions #4318 (@bk2204) +* fs: don't panic when using a too-short object ID to push #4307 (@bk2204) +* Fix pattern matching for .gitattributes #4301 (@bk2204) +* config: map missing port to default for HTTP key lookups #4282 (@bk2204) +* tools: use IoctlFileClone from golang.org/x/sys/unix #4261 (@tklauser) +* tools/util_darwin.go: Remove use of direct syscalls #4251 (@stanhu) +* tools: always force a UTF-8 locale for cygpath #4231 (@bk2204) +* prune: fix deleting objects referred to by stashes #4209 (@sinbad) + +### Misc + +* migrate import: warn about refs on case insensitive file systems #4332 (@larsxschneider) +* Drop obsolete OS support #4328 (@bk2204) +* tools: use ERROR_SHARING_VIOLATION const from golang.org/x/sys/windows #4291 (@tklauser) +* pull: gracefully handle merge conflicts #4289 (@bk2204) +* script/upload: avoid using Ruby's URI.escape #4266 (@bk2204) +* add documentation of security bug report process #4244 (@chrisd8088) + +## 2.12.1 (4 Nov 2020) + +This release introduces a security fix for Windows systems, which has been +assigned CVE-2020-27955. + +On Windows, if Git LFS operates on a malicious repository with a git.bat or +git.exe file in the current directory, that program is executed, permitting the +attacker to execute arbitrary code. This security problem does not affect Unix +systems. + +This occurs because on Windows, Go includes (and prefers) the current directory +when the name of a command run does not contain a directory separator. This has +been solved by always using PATH to pre-resolve paths before handing them to Go. + +We would like to extend a special thanks to the following open-source +contributors: + +* @dawidgolunski for reporting this to us responsibly + +### Bugs + +* subprocess: avoid using relative program names (@bk2204) + +## 2.12.0 (1 Sep 2020) + +This release introduces several new features, such as support for the SHA-256 +repositories coming in a future version of Git, restored support for Go 1.11, +the ability to read the contents of .lfsconfig from the repository, signed and +notarized binaries on macOS, and pre-built 32-bit ARM binaries on Linux. In +addition, several bugs have been fixed and miscellaneous fixes included. + +Note that macOS releases are now shipped as zip files, not tarballs, since it is +not possible to notarize tarballs. macOS releases are now also built on macOS, +so `git lfs dedup` should now function. + +We would like to extend a special thanks to the following open-source +contributors: + +* @saracen for adding support for ARM binaries +* @mversluys for improving locking support +* @cccfeng for updating our documentation to make it more readable +* @bluekeyes for improving performance and tracing +* @gertcuykens for adding missing parts of our documentation + +### Features + +* config: optionally read .lfsconfig from the repository #4200 (@bk2204) +* Support SHA-256 repositories #4186 (@bk2204) +* allow Go 1.11 builds by using WaitStatus.ExitStatus() #4183 (@chrisd8088) +* add --worktree option to install and uninstall commands #4159 (@chrisd8088) +* Sign and notarize binaries on macOS #4143 (@bk2204) +* Makefile: add linux arm build and release targets #4126 (@saracen) +* Allow locking and unlocking non-existent files #3992 (@mversluys) + +### Bugs + +* docs/api/locking: add an explicit
#4208 (@cccfeng) +* Fix hang when the user lacks permissions #4207 (@bk2204) +* Don't mark unlocked files that aren't lockable as read-only #4171 (@bk2204) +* locking: make patterns with slashes work on Windows #4139 (@bk2204) +* git: consider full refspec when determining seen refs #4133 (@bk2204) + +### Misc + +* Fix Windows CI #4199 (@bk2204) +* Fix testsuite when working with non-master default branch #4174 (@bk2204) +* git: improve performance of remote ref listing #4176 (@bluekeyes) +* subprocess: trace all command execution #4175 (@bluekeyes) +* Update git-lfs-migrate.1.ronn #3869 (@gertcuykens) +* t: use repo v1 with extensions #4177 (@bk2204) +* Makefile: ensure temp Go modules can be deleted #4157 (@chrisd8088) +* Improve test suite robustness via environment #4132 (@bk2204) + +## 2.11.0 (8 May 2020) + +This release introduces several new features, such as better support for unnamed +local paths and URLs as remotes, support for `submodule.recurse`, exponential +backoff on failure, and support for renegotiation. In addition, numerous bugs +have been fixed and miscellaneous issues have been addressed. + +We would like to extend a special thanks to the following open-source +contributors: + +* @bluekeyes for adding support for exponential backoff +* @pluehne for adding support for `submodule.recurse` +* @Electric26 for fixing the default behavior of a prompt +* @nataliechen1 for fixing certain upload retry failures +* @shalashik for fixing a panic during cherry-pick +* @swisspol for updating our documentation to reflect supported `.lfsconfig` + keys +* @dan2468 for updating the copyright year + +### Features + +* Allow literal local paths as remotes #4119 (@bk2204) +* pre-push: find named remote for URL if possible #4103 (@bk2204) +* tq: add exponential backoff for retries #4097 (@bluekeyes) +* migrate import: set text to unspecified for excluded fields #4068 (@bk2204) +* Update list of distros for packagecloud.io #4080 (@bk2204) +* lfshttp: allow renegotiation #4066 (@bk2204) +* Support submodule.recurse = true #4063 (@pluehne) +* add man page for the post-commit hook command #4052 (@chrisd8088) +* Add an option to control warning about files larger than 4 GiB #4009 (@bk2204) + +### Bugs + +* commands/command_migrate.go: fix bug #4116 (@Electric26) +* git: avoid "bad object" messages when force-pushing #4102 (@bk2204) +* git: avoid trying to rewrite remote tags as remote branches #4096 (@bk2204) +* make Go tests run consistently using local binary #4084 (@chrisd8088) +* commands: don't honor lfs.fetch* for ls-files #4083 (@bk2204) +* commands: print help output with --help #4059 (@bk2204) +* fail dedup command with explanation when LFS extensions configured #4045 (@chrisd8088) +* fix upload retry 'file already closed' issue' #4042 (@nataliechen1) +* commands/command_filter_process: cherry-pick of several commits cause panic error #4017 (@shalashik) +* Check error when creating local storage directory #4016 (@bk2204) +* track: detect duplicate patterns with --filename #4000 (@bk2204) + +### Misc + +* Removed lfs.extension.* from list of supported keys for .lfsconfig #4044 (@swisspol) +* Tidy modules #4035 (@bk2204) +* README: explain how to verify releases #4022 (@bk2204) +* docs: document git lfs migrate --yes #4023 (@bk2204) +* Stop using cgo on amd64 Linux #4026 (@bk2204) +* updated copyright year #3995 (@dan2468) + +## 2.10.0 (21 January 2020) + +This release introduces several new features, such as support for local paths in +remotes, Kerberos support, and official binaries for S390x and little-endian +64-bit PowerPC systems. In addition, numerous bugs have been fixed and +miscellaneous issues have been addressed. + +We would like to extend a special thanks to the following open-source +contributors: + +* @ganadist for fixing a bug in the output of `git lfs env` +* @exceed-alae for fixing a possible nil pointer dereference +* @slonopotamus for improvements to Windows support and code cleanups +* @nataliechen1 for fixing a data race +* @ssgelm for writing and updating the code to use a new cookie jar parser +* @austintraver for improving the output of `git lfs status` +* @nikola-sh for improving option parity with Git +* @alrs for fixing several error checks in the testsuite +* @pluehne for improving our support for uncommon references + +### Features + +* Optimize pushes for multiple refs #3978 (@bk2204) +* Include ppc64le and s390x Linux builds in releases #3983 (@bk2204) +* Kerberos (SPNEGO) support for HTTP #3941 (@bk2204) +* Add support for local paths #3918 (@bk2204) +* Allow specifying HTTP version to use #3887 (@bk2204) + +### Bugs + +* t-duplicate-oids: use correct awk indexing #3981 (@bk2204) +* Improved proxy support #3972 (@bk2204) +* install: don't print error if run outside repository #3969 (@bk2204) +* debian: bump version of golang-go #3959 (@bk2204) +* lfshttp: Set valid default value for lfs.concurrenttransfers #3949 (@ganadist) +* Add nil-check on defer block of DoTransfer() #3936 (@exceed-alae) +* Retry batch failures #3930 (@bk2204) +* rpm: use old setup code on CentOS 7 #3938 (@bk2204) +* Interpret relative hook paths as relative to working tree #3926 (@bk2204) +* Handle missing cygpath gracefully #3910 (@bk2204) +* Update index before showing status #3921 (@bk2204) +* Honor lfs.url when deciding on transfer adapters #3905 (@bk2204) +* Implement retry logic to fix LFS storage race conditions on Windows #3890 (@slonopotamus) +* Avoid hang when using git hash-object --stdin --path #3902 (@bk2204) +* synchronize access to netrcCredentialHelper.skip #3896 (@nataliechen1) + +### Misc + +* Improve license files #3973 (@bk2204) +* Add CI link to CI badge in README #3960 (@slonopotamus) +* Clarify output shown by `git lfs status` #3953 (@austintraver) +* Revert "ci: force Windows Git version to 2.22.0" #3903 (@bk2204) +* Better document pointer format constraints #3944 (@bk2204) +* Don't abort with newer Git when in a bare repo #3940 (@bk2204) +* Fix more Linux package issues #3932 (@bk2204) +* docs: explain shell metacharacters better #3920 (@bk2204) +* Reset the text attribute on export #3913 (@bk2204) +* Support schannel ssl backend #3868 (@nikola-sh) +* Allow migrate export to handle non-pointer files gracefully #3911 (@bk2204) +* git/gitattr: fix dropped test errors #3904 (@alrs) +* Accept all local references with git lfs push #3876 (@pluehne) +* Drop pre-1.6 Go compatibility code #3897 (@slonopotamus) +* tools/kv: Fix dropped test error #3882 (@alrs) +* Use different parser for cookiejar files #3886 (@ssgelm) +* Stop replacing files in LFS storage when downloading them concurrently on Windows #3880 (@slonopotamus) +* Fix error strings to follow Go guidelines #3871 (@slonopotamus) +* Miscellaneous release fixes #3866 (@bk2204) + +## 2.9.2 (12 December 2019) + +This release fixes a few regressions, such as a possible nil pointer +dereference, a failure to retry batch requests, and a bug where repositories +could fail to be detected on Windows. + +We would like to extend a special thanks to the following open-source +contributors: + +* @exceed-alae for fixing a possible nil pointer dereference + +### Bugs + +* Add nil-check on defer block of DoTransfer() #3936 (@exceed-alae) +* Retry batch failures #3930 (@bk2204) +* rpm: use old setup code on CentOS 7 #3938 (@bk2204) +* Handle missing cygpath gracefully #3910 (@bk2204) + +### Misc + +* Don't abort with newer Git when in a bare repo #3940 (@bk2204) +* Fix more Linux package issues #3932 (@bk2204) + +## 2.9.1 (25 November 2019) + +This release fixes a few regressions, such as the ability to use HTTP/1.1 when +required, addresses a race condition, and switches the cookie jar parser to +something that's easier for distributions to package. + +We would like to extend a special thanks to the following open-source +contributors: + +* @nataliechen1 for fixing a data race +* @ssgelm for writing and updating the code to use a new cookie jar parser + +### Features + +* Allow specifying HTTP version to use #3887 (@bk2204) + +### Bugs + +* synchronize access to netrcCredentialHelper.skip #3896 (@nataliechen1) +* Fix several causes of CI problems #3878 (@bk2204) +* Miscellaneous release fixes #3866 (@bk2204) + +### Misc + +* Build artifacts during CI for easier testing #3892 (@bk2204) +* Use different parser for cookiejar files #3886 (@ssgelm) + +## 2.9.0 (17 October 2019) + +This release adds support for DragonFly BSD, adds a new `git lfs dedup` command +to save space if the file system supports it, adds support for file URLs, +improves the performance when walking the repository, contains improvements +to use HTTP/2 when available and cookies when required, and numerous other bug +fixes, features, and modifications. + +We would like to extend a special thanks to the following open-source +contributors: + +* @pluehne for adding support for fetching the history of specific refs +* @kupson for adding cookie support +* @liweitianux for adding Dragonfly BSD support +* @kazuki-ma for implementing de-duplication support +* @dvdveer for adding range support to ls-files +* @dyrone, @pmeerw, @yamiacat, and @kittenking for cleaning up some documentation issues +* @slonopotamus for improving concurrent downloads +* @nataliechen1 for fixing remote names with dots +* @jw3 for removing excessive logging +* @SeamusConnor for significantly improving performance when walking the repository + +### Features + +* Support fetching entire history of specific refs #3849 (@pluehne) +* Add support for CentOS 8 #3854 (@bk2204) +* Let git-lfs HTTPS transport send cookies #3825 (@kupson) +* Support DragonFly BSD #3837 (@liweitianux) +* HTTP/2 protocol support #3793 (@PastelMobileSuit) +* Add clonefile on Windows over ReFS support. #3790 (@kazuki-ma) +* Add new command `git lfs dedup` for file system level de-duplication. #3753 (@kazuki-ma) +* Support GIT_ALTERNATE_OBJECT_DIRECTORIES #3765 (@bk2204) +* ls-files: add support for reference range #3764 (@dvdveer) +* Add several additional distros for packagecloud.io #3751 (@bk2204) +* Provide an option to track to handle paths literally #3756 (@bk2204) +* Optimize traversal of Git objects with URL remotes #3755 (@bk2204) +* Support for file URLs #3748 (@bk2204) +* Add clone file on MacOS X (darwin). #3745 (@kazuki-ma) + +### Bugs + +* Fix JSON comma problems in docs #3851 (@dyrone) +* Remove redundant comma in batch.md #3841 (@dyrone) +* More robust handling of parallel attempts to download the same file #3826 (@slonopotamus) +* Update wildmatch to v1.0.4 #3820 (@bk2204) +* Update to gitobj v1.4.1 #3815 (@bk2204) +* Fix build error when cross-compiling #3817 (@bk2204) +* Do not fail when multiple processes download the same lfs file #3813 (@slonopotamus) +* Fix Remote Name Parsing Bug #3812 (@nataliechen1) +* status: gracefully handle files replaced by directories #3768 (@bk2204) +* Avoid deadlock when transfer queue fails #3800 (@bk2204) +* Avoid a hang when Git is slow to provide us data #3806 (@bk2204) +* tasklog/log.go: print "done" messages with a trailing period #3789 (@ttaylorr) +* track: make --filename work with spaces #3785 (@bk2204) +* Fix couple of 'the the' typos #3786 (@pmeerw) +* Use an absolute path for smudging #3780 (@bk2204) +* Fix URL parsing with Go 1.12.8 #3771 (@bk2204) +* Fix remote autoselection when not on a branch #3759 (@bk2204) +* Replace deprecated SEEK_SET, SEEK_CUR usage. #3739 (@kazuki-ma) +* Do not log skipped checkouts to file #3736 (@jw3) +* Fix typos across git-lfs repository #3728 (@kittenking) +* Accept legacy Git SSH URLs #3713 (@bk2204) + +### Misc + +* ls-files --all man patch #3859 (@yamiacat) +* Reworked to use git ls-files in some circumstances instead of FastWalkGitRepo #3823 (@SeamusConnor) +* Clean up go.mod for Go 1.13 #3807 (@bk2204) +* Use FICLONE instead of BTRFS_IOC_CLONE. #3796 (@kazuki-ma) +* Remove unused pty code #3737 (@bk2204) + +## 2.8.0 (23 July 2019) + +This release adds support for SOCKS proxies and Windows junctions, adds native +packages for Debian 10 and similar distributions, reduces the number of +situations in which running `git lfs fetch --all` is required, improves +compatibility with Cygwin, and numerous other bug fixes and modifications. + +We would like to extend a special thanks to the following open-source +contributors: + +* @mstrap for adding support for listing lock owners +* @h-hirokawa for adding support for rewriting object URLs +* @slonopotamus for helping get our specs and implementation in sync +* @ssgelm for improving our Debian packaging +* @TBK for fixing a test +* @hartzell for improving the compatibility of our Makefile +* @AJH16 for implementing support for NTLM SSO + +### Features + +* Don't fail if we lack objects the server has #3634 (@bk2204) +* Add support for Debian 10 #3697 (@bk2204) +* Migrate tags pointing to other tags #3690 (@bk2204) +* Add support for SOCKS proxies #3677 (@bk2204) +* Allow vendors to customize the version info #3636 (@bk2204) +* Wrap credential data to allow late credential prompting and update NTLM/SSPI to attempt SSPI login prior to prompting for credentials. #3617 (@AJH16) +* git-lfs locks should optionally denote own locks #3569 (@mstrap) +* tq/adapterbase: support rewriting href #3590 (@h-hirokawa) +* Handle Windows junctions properly #3560 (@bk2204) +* Allow specifying multiple insteadOf aliases #3550 (@bk2204) + +### Bugs + +* Make API documentation lock creation example less confusing #3648 (@bk2204) +* Use a download token when searching locks #3715 (@bk2204) +* Copy mode from original file when rewriting objects #3694 (@bk2204) +* Don't recurse into submodules when walking repository #3686 (@bk2204) +* Be more precise about what timestamps we accept #3680 (@bk2204) +* Canonicalize common directory paths on Cygwin #3671 (@bk2204) +* Ensure we always use correct Content-Type and Accept headers #3663 (@bk2204) +* Fix 'owner' lock field not documented as optional #3651 (@slonopotamus) +* Improve error handling in git lfs install #3624 (@bk2204) +* Properly handle config options for URLs with upper case letters #3584 (@bk2204) +* Detect Cygwin-style pipe TTYs as TTYs #3582 (@bk2204) +* Set push remote for pre-push #3579 (@bk2204) +* Switch from manually running go generate to using dh-golang to run it #3549 (@ssgelm) +* Install worktree hooks in the proper directory #3547 (@bk2204) +* Avoid nil pointer dereference on download failure #3537 (@bk2204) +* Avoid nil pointer dereference on unexpected failure #3534 (@bk2204) + +### Misc + +* Update gitobj to v1.3.1 #3716 (@bk2204) +* Use default line endings for core.autocrlf=input #3712 (@bk2204) +* Fix CircleCI #3710 (@bk2204) +* Vendor in gitobj v1.3.0 #3685 (@bk2204) +* Update CONTRIBUTING #3673 (@bk2204) +* Fix typo in t-locks.sh #3666 (@TBK) +* Make 'locked_at' required in JSON schema #3655 (@slonopotamus) +* Document a new batch error code #3639 (@bk2204) +* Fix invalid JSON in LFS locking API documentation #3650 (@slonopotamus) +* Fix invalid JSON in locking protocol docs #3644 (@slonopotamus) +* Various release updates #3623 (@bk2204) +* tq/adapterbase: fix typo enableHrefRerite to enableHrefRewrite #3594 (@h-hirokawa) +* Use git-lfs version of go-ntlm #3588 (@bk2204) +* Log test server standard error to log file #3577 (@bk2204) +* Don't set -extldflags unless LDFLAGS has a value #3545 (@hartzell) +* Retry on oversize file #3518 (@bk2204) +* Fix asset uploading during releases #3538 (@bk2204) + +## 2.7.0 (15 February 2019) + +This release adds better support for large files on 32-bit systems, adds +attribute macros, fixes several file descriptor leaks, improves compatibility +with Git's configuration parsing, and includes numerous other bug fixes and +modifications. + +We would like to extend a special thanks to the following open-source +contributors: + +* @andyneff and @torbjoernk for updating our release targets +* @zkry for work on rate-limiting +* @Foxboron for work on reproducible builds +* @mstrap for adding a release target for Linux arm64 +* @keiko713, @Erwyn, and @mloskot for improving our documentation +* @QuLogic for fixing our tests under SELinux +* @saracen and @steffengodskesen for improving our output handling +* @mbsulliv for finding and fixing a bug where we ran out of file descriptors + +### Features + +* Add sles 15 support #1055 #3515 (@andyneff) +* docs/man/git-lfs-config.5.ronn: document GIT_LFS_SKIP_SMUDGE #3509 (@ttaylorr) +* commands/command_pointer.go: introduce `--check` option #3501 (@ttaylorr) +* Makefile additions for reproducible builds and asmflags #3444 (@Foxboron) +* locking: add flag to control modification of ignored files #3409 (@bk2204) +* build package for Ubuntu 18.10 aka Cosmic #3402 (@torbjoernk) +* Add support for retries with delays (ex. rate limiting) #3449 (@zkry) +* Trim embedded paths out of binaries #3443 (@bk2204) +* Ensure 32-bit Git LFS binaries can handle files larger than 4 GiB #3426 (@bk2204) +* Support attribute macros #3391 (@bk2204) +* tasklog: don't log progress status when stdout is not a tty #3349 (@steffengodskesen) +* locking: cache JSON response from server #3253 (@mstrap) +* tq: enable transfer debugging when GIT_CURL_VERBOSE is set #3341 (@bk2204) + +### Bugs + +* .circleci: don't use 'brew prune' #3514 (@ttaylorr) +* t/t-smudge.sh: remove unnecessary test #3513 (@ttaylorr) +* docs/man: fix inconsistency in 'git-lfs-ls-files(1)' #3496 (@ttaylorr) +* lfshttp: close body on redirect #3479 (@bk2204) +* status: handle deleted files gracefully #3482 (@bk2204) +* Fix hang in prune with too few file descriptors #3460 (@bk2204) +* Fix parameter name on List Locks API Documentation #3477 (@Erwyn) +* TST: Trim security context when checking permissions. #3476 (@QuLogic) +* command/env: ensure we honor lfs.url #3470 (@bk2204) +* Fix swapped case sensitivity in patterns #3433 (@bk2204) +* core.sharedRepository improvements for directories #3417 (@bk2204) +* Update the doc of whitelisted .lfsconfig keys #3423 (@keiko713) +* Rewrite URL config-matching #3392 (@PastelMobileSuit) +* git: close blob objects when finished #3379 (@bk2204) +* Avoid hang in repos cloned with --shared or --reference #3383 (@bk2204) +* commands/command_status.go: require a working copy #3378 (@ttaylorr) +* Fix test server API #3377 (@bk2204) +* vendor: don't remove necessary dependencies #3356 (@ttaylorr) +* filepathfilter: don't say file is both accepted and rejected #3360 (@bk2204) +* Support pushInsteadOf aliases when determining endpoints #3353 (@bk2204) +* Close attributes file #3347 (@mbsulliv) +* Fix humanize's FormatByteRate() to work with 0s duration #3340 (@saracen) + +### Misc + +* Release automation #3510 (@bk2204) +* docs/man: update `git-lfs-fetch(1)` manpage #3488 (@ttaylorr) +* Update Cobra #3483 (@bk2204) +* Run go generate only on Windows #3480 (@bk2204) +* docs/man/git-lfs-migrate: make examples less confusing #3424 (@bk2204) +* Modify logic of 'migrate info' to process extensionless files #3458 (@zkry) +* Improve error message on missing object #3398 (@bk2204) +* docs/man: suggest using Git configuration for LFS keys #3394 (@bk2204) +* Document default value of migrate info --top= #3387 (@mloskot) +* Clarify minimum git version #3327 (@carlwgeorge) + +## 2.6.1 (3 December 2018) + +This release contains miscellaneous bug fixes since v2.6.0. Most notably, +release v2.6.1 restores support for alternate repositories, which was +accidentally broken in v2.6.0. + +### Bugs + +* git: close blob objects when finished #3379 (@bk2204) +* Avoid hang in repos cloned with --shared or --reference #3383 (@bk2204) +* vendor: don't remove necessary dependencies #3356 (@ttaylorr) + +## 2.6.0 (1 November, 2018) + +This release adds better support for redirecting network calls from a Git LFS +API server to one that requires a different authentication mode, builds Git LFS +on Go 1.11, and numerous other bug fixes and modifications. + +We would like to extend a special thanks to the following open-source +contributors: + +* @andyneff for updating our release targets +* @gtsiolis: for removing the deprecated `git lfs clone` from the listing of + supported Git LFS commands +* @jsantell for fixing a formatting issue in the INCLUDE AND EXCLUDE man page + section +* @mmlb for adding a release target for Linux arm64 +* @skashyap7 for adding the 'git lfs track -n' +* @Villemoes: for modernizing the Git LFS installation procedure on Debian. + +### Features + +* commands: list explicitly excluded patterns separately #3320 (@bk2204) +* Uninstall improvements #3326 (@bk2204) +* config: honor GIT_AUTHOR_DATE and GIT_COMMITTER_DATE #3314 (@bk2204) +* Add new `.netrc` credential helper #3307 (@PastelMobileSuit) +* Honor umask and core.sharedRepository #3304 (@bk2204) +* Support listing only filename tracked by git lfs using --name (-n) option + #3271 (@skashyap7) +* all: use Go 1.11.1 in CI #3298 (@ttaylorr) +* lfsapi/tq: Have DoWithAuth() caller determine URL Access Mode #3293 + (@PastelMobileSuit) +* commands: undeprecate checkout #3303 (@bk2204) +* Checkout options for conflicts #3296 (@bk2204) +* Makefile: build source tarballs for release #3283 (@bk2204) +* Encrypted SSL key support #3270 (@bk2204) +* Add support for core.sshCommand #3235 (@bk2204) +* gitobj-based Object Scanner #3236 (@bk2204) +* README.md: new core team members #3217 (@ttaylorr) +* Add build and releases for linux arm64 #3196 (@mmlb) +* Update packagecloud.rb #3210 (@andyneff) +* all: use Go modules instead of Glide #3208 (@ttaylorr) +* all: use Go 1.11 in CI #3203 (@ttaylorr) + +### Bugs + +* Fix formatting of INCLUDE AND EXCLUDE (REFS) #3330 (@jsantell) +* go.sum: add missing entries #3319 (@bk2204) +* Ensure correct syntax for commit headers in lfs migrate import #3313 (@bk2204) +* Clean up trailing whitespace #3299 (@bk2204) +* commands: unambiguously resolve remote references #3285 (@ttaylorr) +* Expand custom transfer args by using the shell #3259 (@bk2204) +* Canonicalize paths properly on Windows #3277 (@bk2204) +* debian/prerm: add --system flag #3272 (@Villemoes) +* t: make testsuite run under git rebase -x #3262 (@bk2204) +* git/gitattr: parse 'set' attributes #3255 (@ttaylorr) +* t: avoid panic in lfstest-customadapter #3243 (@bk2204) +* t: avoid using shell variables in printf's first argument #3242 (@bk2204) +* lfsapi: handle SSH hostnames and aliases without users #3230 (@bk2204) +* commands/command_ls_files.go: ignore index with argument #3219 (@ttaylorr) +* commands/command_migrate_import.go: install hooks #3227 (@ttaylorr) +* t: mark test sources as .PHONY #3228 (@ttaylorr) +* Pass GIT_SSH_COMMAND to the shell #3199 (@bk2204) +* Tidy misformatted files #3202 (@bk2204) +* config: expand core.hooksPath #3212 (@ttaylorr) +* locks: manage write permissions of ignored files #3190 (@ttaylorr) + +### Misc + +* CONTRIBUTING.md: :nail_care: #3325 (@ttaylorr) +* Update CONTRIBUTING #3317 (@bk2204) +* go.mod: depend on tagged gitobj #3311 (@ttaylorr) +* RFC: SSH protocol #3290 (@bk2204) +* Remove `git lfs clone` command from man #3301 (@gtsiolis) +* ROADMAP.md: use GitHub issues instead #3286 (@ttaylorr) +* docs: add note about closing release milestone #3274 (@bk2204) +* CI improvements #3268 (@bk2204) +* docs/howto: document our release process #3261 (@ttaylorr) +* Create new lfshttp package #3244 (@PastelMobileSuit) +* CONTRIBUTING: update required go version #3232 (@PastelMobileSuit) +* go.mod: use latest github.com/olekukonko/ts #3223 (@ttaylorr) +* go.mod: pin github.com/git-lfs/wildmatch to v1.0.0 #3218 (@ttaylorr) +* Update README.md #3193 (@srl295) + +## 2.5.2 (17 September, 2018) + +### Bugs + +* config: Treat [host:port]:path URLs correctly #3226 (@saschpe) +* tq: Always provide a Content-Type when uploading files #3201 (@bk2204) +* commands/track: Properly `lfs track` files with escaped characters in their + name #3192 (@leonid-s-usov) + +### Misc + +* packagecloud.rb: remove older versions #3210 (@andyneff) + +## 2.5.1 (2 August, 2018) + +This release contains miscellaneous bug fixes since v2.5.0. Most notably, +release v2.5.1 allows a user to disable automatic Content-Type detection +(released in v2.5.0) via `git config lfs.contenttype false` for hosts that do +not support it. + +### Features + +* tq: make Content-Type detection disable-able #3163 (@ttaylorr) + +### Bugs + +* Makefile: add explicit rule for commands/mancontent_gen.go #3160 (@jj1bdx) +* script/install.sh: mark as executable #3155 (@ttaylorr) +* config: add origin to remote list #3152 (@PastelMobileSuit) + +### Misc + +* docs/man/mangen.go: don't show non-fatal output without --verbose #3168 + (@ttaylorr) +* LICENSE.md: update copyright year #3156 (@IMJ355) +* Makefile: silence some output #3164 (@ttaylorr) +* Makefile: list prerequisites for resource.syso #3153 (@ttaylorr) + +## 2.5.0 (26 July, 2018) + +This release adds three new migration modes, updated developer ergonomics, and +a handful of bug fixes to Git LFS. + +We would like to extend a special thanks to the following open-source +contributors: + +* @calavera for fixing a broken Go test and adding support for custom + Content-Type headers in #3137 and #3138. +* @cbuehlmann for adding support for encoded character names in filepaths via + #3093. +* @larsxschneider for changing the default value of lfs.allowincompletepush in + #3109. +* @NoEffex for supporting TTL in SSH-based authentication tokens via #2867. +* @ssgelm for adding 'go generate' to our Debian packages via #3083. + +### Features + +* Makefile: replace many scripts with make targets #3144 (@ttaylorr) +* {.travis,appveyor}.yml: upgrade to Go 1.10.3 #3146 (@ttaylorr) +* t: run tests using prove #3125 (@ttaylorr) +* commands/migrate: infer wildmatches with --fixup #3114 (@ttaylorr) +* Retry SSH resolution 5 times #2934 (@stanhu) +* Implement `migrate export` subcommand #3084 (@PastelMobileSuit) +* Add `--no-rewrite` flag to `migrate import` command #3029 (@PastelMobileSuit) + +### Bugs + +* t: fix contains_same_elements() fn #3145 (@PastelMobileSuit) +* commands: warn if working copy is dirty #3124 (@ttaylorr) +* Ensure provided remote takes precedence over configured pushRemote #3139 (@PastelMobileSuit) +* Fix proxy unit tests. #3138 (@calavera) +* commands/command_migrate.go: loosen meaning of '--everything' #3121 (@ttaylorr) +* lfsapi: don't query askpass for given creds #3126 (@PastelMobileSuit) +* config/git_fetcher.go: mark 'lfs.allowincompletepush' as safe #3113 (@ttaylorr) +* fs: support multiple object alternates #3116 (@ttaylorr) +* commands/checkout: checkout over read-only files #3120 (@ttaylorr) +* test/testhelpers.sh: look for 64 character SHA-256's #3119 (@ttaylorr) +* config/config.go: case-insensitive error search #3098 (@ttaylorr) +* Encoded characters in pathnames #3093 (@cbuehlmann) +* Support default TTL for authentication tokens acquired via SSH #2867 (@NoEffex) +* commands/status.go: relative paths outside of root #3080 (@ttaylorr) +* Run `go generate` on commands in deb build #3083 (@ssgelm) +* lfsapi: prefer proxying from gitconfig before environment #3062 (@ttaylorr) +* commands/track: respect global- and system-level gitattributes #3076 (@ttaylorr) +* git/git.go: pass --multiple to git-fetch(1) when appropriate #3063 (@ttaylorr) +* commands/checkout: fix inaccurate messaging #3055 (@ttaylorr) +* commands/migrate: do not migrate empty commits #3054 (@ttaylorr) +* git/odb: retain trailing newlines in commit messages #3053 (@ttaylorr) + +### Misc + +* Set original file content type on basic upload. #3137 (@calavera) +* README.md: Git for Windows ships LFS by default #3112 (@larsxschneider) +* change lfs.allowincompletepush default from true to false #3109 (@larsxschneider) +* *: replace git/odb with vendored copy #3108 (@ttaylorr) +* test/test-ls-files.sh: skip on CircleCI #3101 (@ttaylorr) +* lfsapi/ssh.go: use zero-value sentinels #3099 (@ttaylorr) +* README.md: add link to installation wiki page #3075 (@ttaylorr) +* docs/man/git-lfs.1.ronn: update casing and missing commands #3059 (@ttaylorr) +* commands/checkout: mark 'git lfs checkout' as deprecated #3056 (@ttaylorr) + +## 2.4.2 (28 May, 2018) + +### Bugs + +* lfsapi: re-authenticate HTTP redirects when needed #3028 (@ttaylorr) +* lfsapi: allow unknown keywords in netrc file(s) #3027 (@ttaylorr) + +## 2.4.1 (18 May, 2018) + +This release fixes a handful of bugs found and fixed since v2.4.0. In +particular, Git LFS no longer panic()'s after invalid API responses, can +correctly run 'fetch' on SHAs instead of references, migrates symbolic links +correctly, and avoids writing to `$HOME/.gitconfig` more than is necessary. + +We would like to extend a "thank you" to the following contributors for their +gracious patches: + +- @QuLogic fixed an issue with running tests that require credentials +- @patrickmarlier made it possible for 'git lfs migrate import' to work + correctly with symbolic links. +- @zackse fixed an inconsistency in `CONTRIBUTING.md` +- @zanglang fixed an inconsistency in `README.md` + +Git LFS would not be possible without generous contributions from the +open-source community. For these, and many more: thank you! + +### Features + +* script/packagecloud.rb: release on Ubuntu Bionic #2961 (@ttaylorr) + +### Bugs + +* lfsapi: canonicalize extra HTTP headers #3010 (@ttaylorr) +* commands/lock: follow symlinks before locking #2996 (@ttaylorr) +* lfs/attribute.go: remove default value from upgradeables #2994 (@ttaylorr) +* git: include SHA1 in ref-less revisions #2982 (@ttaylorr) +* Do not migrate the symlinks to LFS objects. #2983 (@patrickmarlier) +* commands/uninstall: do not log about global hooks with --local #2976 (@ttaylorr) +* commands/run.go: exit 127 on unknown sub-command #2969 (@ttaylorr) +* commands/{un,}track: perform "prefix-agnostic" comparisons #2955 (@ttaylorr) +* commands/migrate: escape paths before .gitattributes #2933 (@ttaylorr) +* commands/ls-files: do not accept '--all' after '--' #2932 (@ttaylorr) +* tq: prevent uint64 underflow with invalid API response #2902 (@ttaylorr) + +### Misc + +* test/test-env: skip comparing GIT_EXEC_PATH #3015 (@ttaylorr) +* remove reference to CLA from contributor's guide #2997 (@zackse) +* .gitattributes link is broken #2985 (@zanglang) +* commands: make --version a synonym for 'version' #2968, #3017 (@ttaylorr) +* test: ensure that git-mergetool(1) works with large files #2939 (@ttaylorr) +* README.md: note the correct PackageCloud URL #2960 (@ttaylorr) +* README.md: mention note about `git lfs track` retroactively #2948 (@ttaylorr) +* README.md: reorganize into Core Team, Alumni #2941 (@ttaylorr) +* README.md: :nail_care: #2942 (@ttaylorr) +* circle.yml: upgrade to 'version: 2' syntax #2928 (@ttaylorr) +* Use unique repo name for tests that require credentials. #2901 (@QuLogic) + +## 2.4.0 (2 March, 2018) + +This release introduces a rewrite of the underlying file matching engine, +expands the API to include relevant refspecs for individual requests, +standardizes the progress output among commands, and more. + +Please note: in the next MAJOR release (v3.0.0) the semantic meaning behind +`--include` and `--exclude` flags will change. As the details of exactly which +existing patterns will no longer function as previously are known, we will +indicate them here. Any `--include` or `--exclude` patterns used in v2.3.0 or +earlier are expected to work as previously in this release. + +This release would not be possible without the open-source community. +Specifically, we would like to thank: + +- @larsxschneider: for contributing fixes to the filter operation in `git lfs + fsck`, and `git lfs prune`, as well as the bug report leading to the + filepathfilter changes. +- @yfronto: for adding new Linux release targets. +- @stffabi: for adding support for NTLM with SSPI on Windows. +- @jeffreydwalter: for fixing memory alignment issues with `sync/atomic` on + 32-bit architectures. +- @b4mboo: for adding a LFS configuration key to the list of safe configuration + options. + +Without the aforementioned indviduals, this release would not have been +possible. Thank you! + +### Features + +* __Support wildmatch-compliant options in `--include`, `--exclude`__ + * filepathfilter: implement using wildmatch #2875 (@ttaylorr) + * test: add wildmatch migration tests #2888 (@larsxschneider, @ttaylorr) +* __Expand the specification to include relevant refspecs__ + * verify locks against each ref being pushed #2706 (@technoweenie) + * Batch send refspec take 2 #2809 (@technoweenie) + * Run 1 TransferQueue per uploaded ref #2806 (@technoweenie) + * Locks/verify: full refspec #2722 (@technoweenie) + * send remote refspec for the other lock commands #2773 (@technoweenie) +* __Standardize progress meter output and implementation__ + * tq: standardized progress meter formatting #2811 (@ttaylorr) + * commands/fetch: unify formatting #2758 (@ttaylorr) + * commands/prune: unify formatting #2757 (@ttaylorr) + * progress: use git/githistory/log package for formatting #2732 (@ttaylorr) + * progress: remove `*progress.Meter` #2762 (@ttaylorr) + * tasklog: teach `*Logger` how to enqueue new `*SimpleTask`'s #2767 (@ttaylorr) + * progress: remove spinner.go #2759 (@ttaylorr) +* __Teach new flags, functionality to `git lfs ls-files`__ + * commands: teach '--all' to `git lfs ls-files` #2796 (@ttaylorr) + * commands/ls-files: show cached, tree-less LFS objects #2795 (@ttaylorr) + * commands/ls-files: add --include, --exclude #2793 (@ttaylorr) + * commands/ls-files: add '--size' flag #2764 (@ttaylorr) +* __Add new flags, functionality to `git lfs migrate`__ + * commands/migrate: support '^'-prefix refspec in arguments #2785 (@ttaylorr) + * commands/migrate: add '--skip-fetch' for offline migrations #2738 (@ttaylorr) + * git: prefer sending revisions over STDIN than arguments #2739 (@ttaylorr) +* __Release to new operating systems__ + * release lfs for ubuntu/artful too #2704 (@technoweenie) + * Adding Mint Sylvia to packagecloud.rb script #2829 (@yfronto) +* __New functionality in package `lfsapi`__ + * NTLM authentication with SSPI on windows #2871 (@stffabi) + * lfsapi/auth: teach DoWithAuth to respect http.extraHeaders #2733 (@ttaylorr) + * add support for url-specific proxies #2651 (@technoweenie) +* __Code cleanup in git.Config, package `localstorage`__ + * Tracked remote #2700 (@technoweenie) + * Replace git.Config #2692 (@technoweenie) + * Replace localstorage #2689 (@technoweenie) + * Remove last global config #2687 (@technoweenie) + * Git config refactor #2676 (@technoweenie) + +### Bugs + +* all: fix 32-bit alignment issues with `sync/atomic` #2883 (@ttaylorr) +* all: memory alignment issues on 32-bit systems. #2880 (@jeffreydwalter) +* command/migrate: don't migrate remote references in bare repositories #2769 (@ttaylorr) +* commands/ls-files: behave correctly before initial commit #2794 (@ttaylorr) +* commands/migrate: allow for ambiguous references in migrations #2734 (@ttaylorr) +* commands: fill in missing printf arg #2678 (@technoweenie) +* config: Add `lfs.locksverify` to safe keys. #2797 (@b4mboo) +* don't replace pointers with objects if clean filter is not configured #2626 (@technoweenie) +* fsck: attach a filter to exclude unfetched items from fsck #2847 (@larsxschneider) +* git/githistory: copy entries from cache, elsewhere #2884 (@ttaylorr) +* git/githistory: migrate annotated tags correctly #2780 (@ttaylorr) +* git/odb: don't print extra newline after commit message #2784 (@ttaylorr) +* git/odb: extract identifiers from commits verbatim #2751 (@wsprent) +* git/odb: implement parsing for annotated `*Tag`'s #2778 (@ttaylorr) +* git/odb: retain newlines when parsing commit messages #2786 (@ttaylorr) +* lfs: PointerScanner is nil after error, so don't close #2699 (@technoweenie) +* lfsapi: Cred helper improvements #2695 (@technoweenie) +* lfsapi: retry requests changing access from none IF Auth header is empty #2621 (@technoweenie) +* prune: always prune excluded paths #2851 (@larsxschneider) +* status: fix incorrect formatting with unpushed objects #2746 (@ttaylorr) +* tasklog: don't drop updates in PercentageTask #2755 (@ttaylorr) +* test: Fix integration test early exit #2735 (@technoweenie) +* test: generate random repo names with fs-safe characters #2698 (@technoweenie) + +### Misc + +* all: Nitpicks #2821 (@technoweenie) +* all: introduce package 'tlog' #2747 (@ttaylorr) +* all: remove CLA #2870 (@MikeMcQuaid) +* build: Specify the embedded Windows icon as part of versioninfo.json #2770 (@sschuberth) +* config,test: Testlib no global config #2709 (@mathstuf) +* config: add PushRemote() for checking `branch.*.pushRemote` and `remote.pushDefault` first #2715 (@technoweenie) +* docs: Added documentation for git-lfs-ls-files' `*/-` output. #2719 (@bilke) +* docs: Uninstall man page improvements #2730 (@dpursehouse) +* docs: Update usage info for post-checkout #2830 (@proinsias) +* docs: add 'git lfs prune' to main man page #2849 (@larsxschneider) +* docs: use consistent casing for Git #2850 (@larsxschneider) +* git/githistory: have `*RefUpdater` hold `*odb.ObjectDatabase` reference #2779 (@ttaylorr) +* progress: move CopyCallback (& related) to package 'tools' #2749 (@ttaylorr) +* progress: move `*progressLogger` implementation to package 'tools' #2750 (@ttaylorr) +* refspec docs #2820 (@technoweenie) +* script/test: run 'go tool vet' during testing #2788 (@ttaylorr) +* tasklog: introduce `*SimpleTask` #2756 (@ttaylorr) +* test: Ignore comment attr lines #2708 (@mathstuf) +* test: Wait longer for test lfs server to start. #2716 (@QuLogic) +* test: ensure commented attr lines are ignored #2736 (@ttaylorr) +* tools/humanize: add 'FormatByteRate' to format transfer speed #2810 (@ttaylorr) +* vendor: update 'xeipuuv/gojsonpointer' #2846 (@ttaylorr) + +## 2.3.4 (18 October, 2017) + +### Features + +* 'git lfs install' updates filters with 'skip-smudge' option #2673 (@technoweenie) + +### Bugs + +* FastWalkGitRepo: limit number of concurrent goroutines #2672 (@technoweenie) +* handle scenario where multiple configuration values exist in ~/.gitconfig #2659 (@shiftkey) + +## 2.3.3 (9 October, 2017) + +### Bugs + +* invoke lfs for 'git update-index', fixing 'status' issues #2647 (@technoweenie) +* cache http credential helper output by default #2648 (@technoweenie) + +## 2.3.2 (3 October, 2017) + +### Features + +* bump default activity timeout from 10s -> 30s #2632 (@technoweenie) + +### Bugs + +* ensure files are marked readonly after unlocking by ID #2642 (@technoweenie) +* add files to index with path relative to current dir #2641 (@technoweenie) +* better Netrc errors #2633 (@technoweenie) +* only use askpass if credential.helper is not configured #2637 (@technoweenie) +* convert backslash to slash when writing to .gitattributes #2625 (@technoweenie) + +### Misc + +* only copy req headers if there are git-configured extra headers #2622 (@technoweenie) +* update tracerx to add timestamps #2620 (@rubyist) + +## 2.3.1 (27 September, 2017) + +### Features + +* add support for SSH_ASKPASS #2609 (@technoweenie) +* `git lfs migrate --verbose` option #2610 (@technoweenie) +* Support standalone custom transfer based on API URL prefix match #2590 (@sprohaska) + +### Bugs + +* Improve invalid URL error messages #2614 (@technoweenie) +* Fix double counting progress bug #2608 (@technoweenie) +* trim whitespace from GIT_ASKPASS provided passwords #2607 (@technoweenie) +* remove mmap usage in Packfile reader #2600 (@technoweenie) +* `git lfs clone`: don't fetch for unborn repositories #2598 (@shiftkey) + +### Misc + +* Windows Installer fixes: + * Show proper icon in add/remove programs list #2585 (@shiftkey) + * Make the Inno Setup installer script explicitly check for the binaries #2588 (@sschuberth) + * Improve compile-win-installer-unsigned.bat a bit #2586 (@sschuberth) +* Update migrate docs example for multiple file types #2596 (@technoweenie) + +## 2.3.0 (14 September, 2017) + +Git LFS v2.3.0 includes performance optimizations for the `git-lfs-migrate(1)` +and `git-clone(1)` commands, new features, bug-fixes, and more. + +This release was made possible by contributors to Git LFS. Specifically: + +- @aleb: added support for "standalone" transfer agents, for using `rsync(1)` + and similar with Git LFS. +- @bozaro: added support for custom `.git/lfs/objects` directories via the + `lfs.storage` configuration option. +- @larsxschneider: fixed a recursive process leak when shelling out to Git, + added new features to `git lfs ls-files`, extra information in error + messages used for debugging, documentation changes and more. +- @mathstuf: contributed a documentation change clarifying LFS's handling of + empty pointer files. +- @rudineirk and @andyneff: updated our release process to build packages for + fedora/26. +- @ssgelm: ensured that LFS is able to be released on Ubuntu Universe. + +To everyone who has contributed to this or previous releases of Git LFS: Thank +you! + +### Features + +* git/odb/pack: improve `git lfs migrate` performance + * git/odb/pack: introduce packed object reassembly #2550 #2551 #2552 #2553 #2554 (@ttaylorr) + * git/odb/pack: teach packfile index entry lookups #2420 #2421 #2422 #2423 #2437 #2441 #2461 (@ttaylorr) + * git/{odb,githistory}: don't write unchanged objects #2541 (@ttaylorr) +* commands: improve `git clone` performance with 'delay' capability #2511 #2469 #2468 #2471 #2467 #2476 #2483 (@ttaylorr) + * commands: mark `git lfs clone` as deprecated #2526 (@ttaylorr) +* commands: enable `lfs.allowincompletepush` by default #2574 (@technoweenie) +* commands: teach '--everything' to `git lfs migrate` #2558 (@ttaylorr) +* commands: teach `git lfs ls-files` a '--debug' option #2540 (@larsxschneider) +* commands,lfs: warn on 4gb size conversion during clean #2510 #2507 #2459 (@ttaylorr) +* lfsapi/creds: teach about GIT_ASKPASS and core.askpass #2500 #2578 (@ttaylorr) +* commands/status: indicate missing objects #2438 (@ttaylorr) +* Allow using custom transfer agents directly #2429 (@aleb) +* Add `lfs.storage` parameter for overriding LFS storage location #2023 (@bozaro) +* lfsapi: enable credential caching by default #2508 (@ttaylorr) +* commands/install: teach `--manual` to `git-lfs-install(1)` #2410 (@ttaylorr) + +### Bugs + +* migrate: fix migrations with subdirectories in '--include' or '--exclude' #2485 (@ttaylorr) +* commands/migrate: fix hardlinking issue when different filesystem is mounted at `/tmp` #2566 (@ttaylorr) +* commands: make `git lfs migrate` fetch ref updates before migrating #2538 (@ttaylorr) +* commands: remove '--above=1mb' default from `git lfs migrate info` #2460 (@ttaylorr) +* filepathfilter: fix `HasPrefix()` when no '--include' filters present #2579 (@technoweenie) +* git/githistory/log: fix race condition with `git/githistory/log` tests #2495 (@ttaylorr) +* git/odb: fix closing object database test #2457 (@ttaylorr) +* git/githistory: only update local refs after migrations #2559 (@ttaylorr) +* locking: fix unlocking files not removing write flag #2514 (@ttaylorr) +* locks: fix unlocking files in a symlinked directory #2505 (@ttaylorr) +* commands: teach `git lfs unlock` to ignore status errs in appropriate conditions #2475 (@ttaylorr) +* git: expand `GetAttributePaths` check to include non-LFS lockables #2528 (@ttaylorr) +* fix multiple `git update-index` invocations #2531 (@larsxschneider) +* tools: fix SSH credential cacher expiration #2530 (@ttaylorr) +* lfsapi: fix read/write race condition in credential cacher #2493 (@ttaylorr) +* lfs: fix cleaning contents larger than 1024 bytes over stdin #2488 (@ttaylorr) +* fsck only scans current version of objects #2049 (@TheJare) +* progress: fix writing updates to `$GIT_LFS_PROGRESS` #2465 (@ttaylorr) +* commands/track: resolve symlinks before comparing attr paths #2463 (@ttaylorr) +* test: ensure that empty pointers are empty #2458 (@ttaylorr) +* git/githistory/log: prevent 'NaN' showing up in `*PercentageTask` #2455 (@ttaylorr) +* tq: teach Batch() API to retry itself after io.EOF's #2516 (@ttaylorr) + +### Misc + +* script/packagecloud: release LFS on Fedora/26 #2443 #2509 (@rudineirk, @andyneff) +* git/githistory: change "Rewriting commits" when not updating refs #2577 (@ttaylorr) +* commands: print IP addresses in error logs #2570 (@larsxschneider) +* commands: print current time in UTC to error logs #2571 (@larsxschneider) +* commands: Disable lock verification when using a standalone custom-tr… #2499 (@aleb) +* docs/man: update `git lfs migrate` documentation with EXAMPLES #2580 (@technoweenie) +* docs/man: recommend global per-host locking config #2546 (@larsxschneider) +* commands: use transfer queue's batch size instead of constant #2529 (@ttaylorr) +* add function to invoke Git with disabled LFS filters #2453 (@larsxschneider) +* config: warn on unsafe keys in `.lfsconfig` #2502 (@ttaylorr) +* glide: remove unused dependencies #2501 (@ttaylorr) +* script/build: pass '-{ld,gc}flags' to compiler, if given #2462 (@ttaylorr) +* spec: mention that an empty file is its own LFS pointer #2449 (@mathstuf) +* Update to latest version of github.com/pkg/errors #2426 (@ssgelm) +* Update gitignore to add some temp files that get created when building debs #2425 (@ssgelm) +* lfs: indent contents of `git lfs install`, `update` #2392 (@ttaylorr) +* tq: increase default `lfs.concurrenttransfers` to 8 #2506 (@ttaylorr) + +## 2.2.1 (10 July, 2017) + +### Bugs + +* git lfs status --json only includes lfs files #2374 (@asottile) +* git/odb: remove temporary files after migration #2388 (@ttaylorr) +* git/githistory: fix hanging on empty set of commits #2383 (@ttaylorr) +* migrate: don't checkout HEAD on bare repositories #2389 (@ttaylorr) +* git/odb: prevent cross-volume link error when saving objects #2382 (@ttaylorr) +* commands: only pass --jobs to `git clone` if set #2369 (@technoweenie) + +### Misc + +* lfs: trace hook install, uninstall, upgrade #2393 (@ttaylorr) +* vendor: remove github.com/cheggaaa/pb #2386 (@ttaylorr) +* Use FormatBytes from git-lfs/tools/humanize instead of cheggaaa/pb #2377 (@ssgelm) + + +## 2.2.0 (27 June, 2017) + +Git LFS v2.2.0 includes bug fixes, minor features, and a brand new `migrate` +command. The `migrate` command rewrites commits, converting large files from +Git blobs to LFS objects. The most common use case will fix a git push rejected +for having large blobs: + +``` +$ git push origin master +# ... +remote: error: file a.psd is 1.2 gb; this exceeds github's file size limit of 100.00 mb +to github.com:ttaylorr/demo.git + ! [remote rejected] master -> master (pre-receive hook declined) +error: failed to push some refs to 'git@github.com:ttaylorr/demo.git' + +$ git lfs migrate info +*.psd 1.2 GB 27/27 files(s) 100% + +$ git lfs migrate import --include="*.psd" +migrate: Sorting commits: ..., done +migrate: Rewriting commits: 100% (810/810), done + master f18bb746d44e8ea5065fc779bb1acdf3cdae7ed8 -> 35b0fe0a7bf3ae6952ec9584895a7fb6ebcd498b +migrate: Updating refs: ..., done + +$ git push origin +Git LFS: (1 of 1 files) 1.2 GB / 1.2 GB +# ... +To github.com:ttaylorr/demo.git + * [new branch] master -> master +``` + +The `migrate` command has detailed options described in the `git-lfs-migrate(1)` +man page. Keep in mind that this is the first pass at such a command, so we +expect there to be bugs and performance issues (especially on long git histories). +Future updates to the command will be focused on improvements to allow full +LFS transitions on large repositories. + +### Features + +* commands: add git-lfs-migrate(1) 'import' subcommand #2353 (@ttaylorr) +* commands: add git-lfs-migrate(1) 'info' subcommand #2313 (@ttaylorr) +* Implement status --json #2311 (@asottile) +* commands/uploader: allow incomplete pushes #2199 (@ttaylorr) + +### Bugs + +* Retry on timeout or temporary errors #2312 (@jakub-m) +* commands/uploader: don't verify locks if verification is disabled #2278 (@ttaylorr) +* Fix tools.TranslateCygwinPath() on MSYS #2277 (@raleksandar) +* commands/clone: add new flags since Git 2.9 #2251, #2252 (@ttaylorr) +* Make pull return non-zero error code when some downloads failed #2237 (@seth2810) +* tq/basic_download: guard against nil HTTP response #2227 (@ttaylorr) +* Bugfix: cannot push to scp style URL #2198 (@jiangxin) +* support lfs..* values where url does not include .git #2192 (@technoweenie) +* commands: fix logged error not interpolating format qualifiers #2228 (@ttaylorr) +* commands/help: print helptext to stdout for consistency with Git #2210 (@ttaylorr) + +### Misc + +* Minor cleanups in help index #2248 (@dpursehouse) +* Add git-lfs-lock and git-lfs-unlock to help index #2232 (@dpursehouse) +* packagecloud: add Debian 9 entry to formatted list #2211 (@ttaylorr) +* Update Xenial is to use stretch packages #2212 (@andyneff) + +## 2.1.1 (19 May, 2017) + +Git LFS v2.1.1 ships with bug fixes and a security patch fixing a remote code +execution vulnerability exploitable by setting a SSH remote via your +repository's `.lfsconfig` to contain the string "-oProxyCommand". This +vulnerability is only exploitable if an attacker has write access to your +repository, or you clone a repository with a `.lfsconfig` file containing that +string. + +### Bugs + +* Make pull return non-zero error code when some downloads failed #2245 (@seth2810, @technoweenie) +* lfsapi: support cross-scheme redirection #2243 (@ttaylorr) +* sanitize ssh options parsed from ssh:// url #2242 (@technoweenie) +* filepathfilter: interpret as .gitignore syntax #2238 (@technoweenie) +* tq/basic_download: guard against nil HTTP response #2229 (@ttaylorr) +* commands: fix logged error not interpolating format qualifiers #2230 (@ttaylorr) + +### Misc + +* release: backport Debian 9-related changes #2244 (@ssgelm, @andyneff, @ttaylorr) +* Add git-lfs-lock and git-lfs-unlock to help index #2240 (@dpursehouse) +* config: allow multiple environments when calling config.Unmarshal #2224 (@ttaylorr) + +## 2.1.0 (28 April, 2017) + +### Features + +* commands/track: teach --no-modify-attrs #2175 (@ttaylorr) +* commands/status: add blob info to each entry #2070 (@ttaylorr) +* lfsapi: improve HTTP request/response stats #2184 (@technoweenie) +* all: support URL-style configuration lookups (@ttaylorr) + * commands: support URL-style lookups for `lfs.{url}.locksverify` #2162 (@ttaylorr) + * lfsapi: support URL-style lookups for `lfs.{url}.access` #2161 (@ttaylorr) + * lfsapi/certs: use `*config.URLConfig` to do per-host config lookup #2160 (@ttaylorr) + * lfsapi: support for http..extraHeader #2159 (@ttaylorr) + * config: add prefix to URLConfig type #2158 (@ttaylorr) + * config: remove dependency on lfsapi package #2156 (@ttaylorr) + * config: support multi-value lookup on URLConfig #2154 (@ttaylorr) + * lfsapi: initial httpconfig type #1912 (@technoweenie, @ttaylorr) +* lfsapi,tq: relative expiration support #2130 (@ttaylorr) + +### Bugs + +* commands: include error in `LoggedError()` #2179 (@ttaylorr) +* commands: cross-platform log formatting to files #2178 (@ttaylorr) +* locks: cross-platform path normalization #2139 (@ttaylorr) +* commands,locking: don't disable locking for auth errors during verify #2110 (@ttaylorr) +* commands/status: show partially staged files twice #2067 (@ttaylorr) + +### Misc + +* all: build on Go 1.8.1 #2145 (@ttaylorr) +* Polish custom-transfers.md #2171 (@sprohaska) +* commands/push: Fix typo in comment #2170 (@sprohaska) +* config: support multi-valued config entries #2152 (@ttaylorr) +* smudge: use localstorage temp directory, not system #2140 (@ttaylorr) +* locking: send locks limit to server #2107 (@ttaylorr) +* lfs: extract `DiffIndexScanner` #2035 (@ttaylorr) +* status: use DiffIndexScanner to populate results #2042 (@ttaylorr) + +## 2.0.2 (29 March, 2017) + +### Features + +* ssh auth and credential helper caching #2094 (@ttaylorr) +* commands,tq: specialized logging for missing/corrupt objects #2085 (@ttaylorr) +* commands/clone: install repo-level hooks after `git lfs clone` #2074 +* (@ttaylorr) +* debian: Support building on armhf and arm64 #2089 (@p12tic) + +### Bugs + +* commands,locking: don't disable locking for auth errors during verify #2111 +* (@ttaylorr) +* commands: show real error while cleaning #2096 (@ttaylorr) +* lfsapi/auth: optionally prepend an empty scheme to Git remote URLs #2092 +* (@ttaylorr) +* tq/verify: authenticate verify requests if required #2084 (@ttaylorr) +* commands/{,un}track: correctly escape '#' and ' ' characters #2079 (@ttaylorr) +* tq: use initialized lfsapi.Client instances in transfer adapters #2048 +* (@ttaylorr) + +### Misc + +* locking: send locks limit to server #2109 (@ttaylorr) +* docs: update configuration documentation #2097 #2019 #2102 (@terrorobe) +* docs: update locking API documentation #2099 #2101 (@dpursehouse) +* fixed table markdown in README.md #2095 (@ZaninAndrea) +* remove the the duplicate work #2098 (@grimreaper) + +## 2.0.1 (6 March, 2017) + +### Misc + +* tq: fallback to `_links` if present #2007 (@ttaylorr) + +## 2.0.0 (1 March, 2017) + +Git LFS v2.0.0 brings a number of important bug fixes, some new features, and +a lot of internal refactoring. It also completely removes old APIs that were +deprecated in Git LFS v0.6. + +### Locking + +File Locking is a brand new feature that lets teams communicate when they are +working on files that are difficult to merge. Users are not able to edit or push +changes to any files that are locked by other users. While the feature has been +in discussion for a year, we are releasing a basic Locking implementation to +solicit feedback from the community. + +### Transfer Queue + +LFS 2.0 introduces a new Git Scanner, which walks a range of Git commits looking +for LFS objects to transfer. The Git Scanner is now asynchronous, initiating +large uploads or downloads in the Transfer Queue immediately once an LFS object +is found. Previously, the Transfer Queue waited until all of the Git commits +have been scanned before initiating the transfer. The Transfer Queue also +automatically retries failed uploads and downloads more often. + +### Deprecations + +Git LFS v2.0.0 also drops support for the legacy API in v0.5.0. If you're still +using LFS servers on the old API, you'll have to stick to v1.5.6. + +### Features + +* Mid-stage locking support #1769 (@sinbad) +* Define lockable files, make read-only in working copy #1870 (@sinbad) +* Check that files are not uncommitted before unlock #1896 (@sinbad) +* Fix `lfs unlock --force` on a missing file #1927 (@technoweenie) +* locking: teach pre-push hook to check for locks #1815 (@ttaylorr) +* locking: add `--json` flag #1814 (@ttaylorr) +* Implement local lock cache, support querying it #1760 (@sinbad) +* support for client certificates pt 2 #1893 (@technoweenie) +* Fix clash between progress meter and credential helper #1886 (@technoweenie) +* Teach uninstall cmd about --local and --system #1887 (@technoweenie) +* Add `--skip-repo` option to `git lfs install` & use in tests #1868 (@sinbad) +* commands: convert push, pre-push to use async gitscanner #1812 (@ttaylorr) +* tq: prioritize transferring retries before new items #1758 (@ttaylorr) + +### Bugs + +* ensure you're in the correct directory when installing #1793 (@technoweenie) +* locking: make API requests relative to repository, not root #1818 (@ttaylorr) +* Teach 'track' about CRLF #1914 (@technoweenie) +* Teach 'track' how to handle empty lines in .gitattributes #1921 (@technoweenie) +* Closing stdout pipe before function return #1861 (@monitorjbl) +* Custom transfer terminate #1847 (@sinbad) +* Fix Install in root problems #1727 (@technoweenie) +* cat-file batch: read all of the bytes #1680 (@technoweenie) +* Fixed file paths on cygwin. #1820, #1965 (@creste, @ttaylorr) +* tq: decrement uploaded bytes in basic_upload before retry #1958 (@ttaylorr) +* progress: fix never reading bytes with sufficiently small files #1955 (@ttaylorr) +* tools: fix truncating string fields between balanced quotes in GIT_SSH_COMMAND #1962 (@ttaylorr) +* commands/smudge: treat empty pointers as empty files #1954 (@ttaylorr) + +### Misc + +* all: build using Go 1.8 #1952 (@ttaylorr) +* Embed the version information into the Windows executable #1689 (@sschuberth) +* Add more meta-data to the Windows installer executable #1752 (@sschuberth) +* docs/api: object size must be positive #1779 (@ttaylorr) +* build: omit DWARF tables by default #1937 (@ttaylorr) +* Add test to prove set operator [] works in filter matching #1768 (@sinbad) +* test: add ntlm integration test #1840 (@technoweenie) +* lfs/tq: completely remove legacy support #1686 (@ttaylorr) +* remove deprecated features #1679 (@technoweenie) +* remove legacy api support #1629 (@technoweenie) + +## 1.5.6 (16 February, 2017) + +## Bugs + +* Spool malformed pointers to avoid deadlock #1932 (@ttaylorr) + +## 1.5.5 (12 January, 2017) + +### Bugs + +* lfs: only buffer first 1k when creating a CleanPointerError #1856 (@ttaylorr) + +## 1.5.4 (27 December, 2016) + +### Bugs + +* progress: guard negative padding width, panic in `strings.Repeat` #1807 (@ttaylorr) +* commands,lfs: handle malformed pointers #1805 (@ttaylorr) + +### Misc + +* script/packagecloud: release LFS on fedora/25 #1798 (@ttaylorr) +* backport filepathfilter to v1.5.x #1782 (@technoweenie) + +## 1.5.3 (5 December, 2016) + +### Bugs + +* Support LFS installations at filesystem root #1732 (@technoweenie) +* git: parse filter process header values containing '=' properly #1733 (@larsxschneider) +* Fix SSH endpoint parsing #1738 (@technoweenie) + +### Misc + +* build: release on Go 1.7.4 #1741 (@ttaylorr) + +## 1.5.2 (22 November, 2016) + +### Features + +* Release LFS on Fedora 24 #1685 (@technoweenie) + +### Bugs + +* filter-process: fix reading 1024 byte files #1708 (@ttaylorr) +* Support long paths on Windows #1705 (@technoweenie) + +### Misc + +* filter-process: exit with error if we detect an unknown command from Git #1707 (@ttaylorr) +* vendor: remove contentaddressable lib #1706 (@technoweenie) + +## 1.5.1 (18 November, 2016) + +### Bugs + +* cat-file --batch parser errors on non-lfs git blobs #1680 (@technoweenie) + +## 1.5.0 (17 November, 2016) + +### Features + +* Filter Protocol Support #1617 (@ttaylorr, @larsxschneider) +* Fast directory walk #1616 (@sinbad) +* Allow usage of proxies even when contacting localhost #1605 (@chalstrick) + +### Bugs + +* start reading off the Watch() channel before sending any input #1671 (@technoweenie) +* wait for remote ref commands to exit before returning #1656 (@jjgod, @technoweenie) + +### Misc + +* rewrite new catfilebatch implementation for upcoming gitscanner pkg #1650 (@technoweenie) +* refactor testutils.FileInput so it's a little more clear #1666 (@technoweenie) +* Update the lfs track docs #1642 (@technoweenie) +* Pre push tracing #1638 (@technoweenie) +* Remove `AllGitConfig()` #1634 (@technoweenie) +* README: set minimal required Git version to 1.8.5 #1636 (@larsxschneider) +* 'smudge --info' is deprecated in favor of 'ls-files' #1631 (@technoweenie) +* travis-ci: test GitLFS with ancient Git version #1626 (@larsxschneider) + +## 1.4.4 (24 October, 2016) + +### Bugs + +* transfer: more descriptive "expired at" errors #1603 (@ttaylorr) +* commands,lfs/tq: Only send unique OIDs to the Transfer Queue #1600 (@ttaylorr) +* Expose the result message in case of an SSH authentication error #1599 (@sschuberth) + +### Misc + +* AppVeyor: Do not build branches with open pull requests #1594 (@sschuberth) +* Update .mailmap #1593 (@dpursehouse) + +## 1.4.3 (17 October, 2016) + +### Bugs + +* lfs/tq: use extra arguments given to tracerx.Printf #1583 (@ttaylorr) +* api: correctly print legacy API warning to Stderr #1582 (@ttaylorr) + +### Misc + +* Test storage retries #1585 (@ttaylorr) +* Test legacy check retries behavior #1584 (@ttaylorr) +* docs: Fix a link to the legacy API #1579 (@sschuberth) +* Add a .mailmap file #1577 (@sschuberth) +* Add a large wizard image to the Windows installer #1575 (@sschuberth) +* Appveyor badge #1574 (@ttaylorr) + +## 1.4.2 (10 October, 2016) + +v1.4.2 brings a number of bug fixes and usability improvements to LFS. This +release also adds support for multiple retries within the transfer queue, making +transfers much more reliable. To enable this feature, see the documentation for +`lfs.transfer.maxretries` in `git-lfs-config(5)`. + +We'd also like to extend a special thank-you to @sschuberth who undertook the +process of making LFS's test run on Windows through AppVeyor. Now all pull +requests run tests on macOS, Linux, and Windows. + +### Features + +* lfs: warn on usage of the legacy API #1564 (@ttaylorr) +* use filepath.Clean() when comparing filenames to include/exclude patterns #1565 (@technoweenie) +* lfs/transfer_queue: support multiple retries per object #1505, #1528, #1535, #1545 (@ttaylorr) +* Automatically upgrade old filters instead of requiring β€”force #1497 (@sinbad) +* Allow lfs.pushurl in .lfsconfig #1489 (@technoweenie) + +### Bugs + +* Use "sha256sum" on Windows #1566 (@sschuberth) +* git: ignore non-root wildcards #1563 (@ttaylorr) +* Teach status to recognize multiple files with identical contents #1550 (@ttaylorr) +* Status initial commit #1540 (@sinbad) +* Make path comparison robust against Windows short / long path issues #1523 (@sschuberth) +* Allow fetch to run without a remote configured #1507 (@sschuberth) + +### Misc + +* travis: run tests on Go 1.7.1 #1568 (@ttaylorr) +* Enable running tests on AppVeyor CI #1567 (@sschuberth) +* Travis: Only install git if not installed yet #1557 (@sschuberth) +* Windows test framework fixes #1522 (@sschuberth) +* Simplify getting the absolute Git root directory #1518 (@sschuberth) +* Add icons to the Windows installer #1504 (@sschuberth) +* docs/man: reference git-lfs-pointer(1) in clean documentation #1503 (@ttaylorr) +* Make AppVeyor CI for Windows work again #1506 (@sschuberth) +* commands: try out RegisterCommand() #1495 (@technoweenie) + +## 1.4.1 (26 August, 2016) + +### Features + +* retry if file download failed #1454 (@larsxschneider) +* Support wrapped clone in current directory #1478 (@ttaylorr) + +### Misc + +* Test `RetriableReader` #1482 (@ttaylorr) + +## 1.4.0 (19 August, 2016) + +### Features + +* Install LFS at the system level when packaged #1460 (@javabrett) +* Fetch remote urls #1451 (@technoweenie) +* add object Authenticated property #1452 (@technoweenie) +* add support for `url.*.insteadof` in git config #1117, #1443 (@artagnon, @technoweenie) + +### Bugs + +* fix --include bug when multiple files have same lfs content #1458 (@technoweenie) +* check the git version is ok in some key commands #1461 (@technoweenie) +* fix duplicate error reporting #1445, #1453 (@dpursehouse, @technoweenie) +* transfer/custom: encode "event" as lowercase #1441 (@ttaylorr) + +### Misc + +* docs/man: note GIT_LFS_PROGRESS #1469 (@ttaylorr) +* Reword the description of HTTP 509 status #1467 (@dpursehouse) +* Update fetch include/exclude docs for pattern matching #1455 (@ralfthewise) +* config-next: API changes to the `config` package #1425 (@ttaylorr) +* errors-next: Contextualize error messages #1463 (@ttaylorr, @technoweenie) +* scope commands to not leak instances of themselves #1434 (@technoweenie) +* Transfer manifest #1430 (@technoweenie) + +## 1.3.1 (2 August 2016) + +### Features + +* lfs/hook: teach `lfs.Hook` about `core.hooksPath` #1409 (@ttaylorr) + +### Bugs + +* distinguish between empty include/exclude paths #1411 (@technoweenie) +* Fix sslCAInfo config lookup when host in config doesn't have a trailing slash #1404 (@dakotahawkins) + +### Misc + +* Use commands.Config instead of config.Config #1390 (@technoweenie) + +## 1.3.0 (21 July 2016) + +### Features + +* use proxy from git config #1173, #1358 (@jonmagic, @LizzHale, @technoweenie) +* Enhanced upload/download of LFS content: #1265 #1279 #1297 #1303 #1367 (@sinbad) + * Resumable downloads using HTTP range headers + * Resumable uploads using [tus.io protocol](http://tus.io) + * Pluggable [custom transfer adapters](https://github.com/git-lfs/git-lfs/blob/master/docs/custom-transfers.md) +* In git 2.9+, run "git lfs pull" in submodules after "git lfs clone" #1373 (@sinbad) +* cmd,doc,test: teach `git lfs track --{no-touch,verbose,dry-run}` #1344 (@ttaylorr) +* ⏳ Retry transfers with expired actions #1350 (@ttaylorr) +* Safe track patterns #1346 (@ttaylorr) +* Add checkout --unstaged flag #1262 (@orivej) +* cmd/clone: add include/exclude via flags and config #1321 (@ttaylorr) +* Improve progress reporting when files skipped #1296 (@sinbad) +* Experimental file locking commands #1236, #1259, #1256, #1386 (@ttaylorr) +* Implement support for GIT_SSH_COMMAND #1260 (@pdf) +* Recognize include/exclude filters from config #1257 (@ttaylorr) + +### Bugs + +* Fix bug in Windows installer under Win32. #1200 (@teo-tsirpanis) +* Updated request.GetAuthType to handle multi-value auth headers #1379 (@VladimirKhvostov) +* Windows fixes #1374 (@sinbad) +* Handle artifactory responses #1371 (@ttaylorr) +* use `git rev-list --stdin` instead of passing each remote ref #1359 (@technoweenie) +* docs/man: move "logs" subcommands from OPTIONS to COMMANDS #1335 (@ttaylorr) +* test/zero-len: update test for git v2.9.1 #1369 (@ttaylorr) +* Unbreak building httputil on OpenBSD #1360 (@jasperla) +* WIP transferqueue race fix #1255 (@technoweenie) +* Safety check to `comands.requireStdin` #1349 (@ttaylorr) +* Removed CentOS 5 from dockers. Fixed #1295. #1298 (@javabrett) +* Fix 'git lfs fetch' with a sha1 ref #1323 (@omonnier) +* Ignore HEAD ref when fetching with --all #1310 (@ttaylorr) +* Return a fully remote ref to reduce chances of ref clashes #1248 (@technoweenie) +* Fix reporting of `git update-index` errors in `git lfs checkout` and `git lfs pull` #1400 (@technoweenie) + +### Misc + +* Added Linux Mint Sarah to package cloud script #1384 (@andyneff) +* travis-ci: require successful tests against upcoming Git core release #1372 (@larsxschneider) +* travis-ci: add a build job to test against upcoming versions of Git #1361 (@larsxschneider) +* Create Makefiles for building with gccgo #1222 (@zeldin) +* README: add @ttaylorr to core team #1332 (@ttaylorr) +* Enforced a minimum gem version of 1.0.4 for packagecloud-ruby #1292 (@javabrett) +* I think this should be "Once installed" and not "One installed", but … #1305 (@GabLeRoux) +* script/test: propagate extra args to go test #1324 (@omonnier) +* Add `lfs.basictransfersonly` option to disable non-basic transfer adapters #1299 (@sinbad) +* Debian build vendor test excludes #1291 (@javabrett) +* gitignore: ignore lfstest-\* files #1271 (@ttaylorr) +* Disable gojsonschema test, causes failures when firewalls block it #1274 (@sinbad) +* test: use noop credential helper for auth tests #1267 (@ttaylorr) +* get git tests passing when run outside of repository #1229 (@technoweenie) +* Package refactor no.1 #1226 (@sinbad) +* vendor: vendor dependencies in vendor/ using Glide #1243 (@ttaylorr) + +## 1.2.1 (2 June 2016) + +### Features + +* Add missing config details to `env` command #1217 (@sinbad) +* Allow smudge filter to return 0 on download failure #1213 (@sinbad) +* Add `git lfs update --manual` option & promote it on hook install fail #1182 (@sinbad) +* Pass `git lfs clone` flags through to `git clone` correctly, respect some options #1160 (@sinbad) + +### Bugs + +* Clean trailing `/` from include/exclude paths #1278 (@ttaylorr) +* Fix problems with user prompts in `git lfs clone` #1185 (@sinbad) +* Fix failure to return non-zero exit code when lfs install/update fails to install hooks #1178 (@sinbad) +* Fix missing man page #1149 (@javabrett) +* fix concurrent map read and map write #1179 (@technoweenie) + +### Misc + +* Allow additional fields on request & response schema #1276 (@sinbad) +* Fix installer error on win32. #1198 (@teo-tsirpanis) +* Applied same -ldflags -X name value -> name=value fix #1193 (@javabrett) +* add instructions to install from MacPorts #1186 (@skymoo) +* Add xenial repo #1170 (@graingert) + +## 1.2.0 (14 April 2016) + +### Features + +* netrc support #715 (@rubyist) +* `git lfs clone` command #988 (@sinbad) +* Support self-signed certs #1067 (@sinbad) +* Support sslverify option for specific hosts #1081 (@sinbad) +* Stop transferring duplicate objects on major push or fetch operations on multiple refs. #1128 (@technoweenie) +* Touch existing git tracked files when tracked in LFS so they are flagged as modified #1104 (@sinbad) +* Support for git reference clones #1007 (@jlehtnie) + +### Bugs + +* Fix clean/smudge filter string for files starting with - #1083 (@epriestley) +* Fix silent failure to push LFS objects when ref matches a filename in the working copy #1096 (@epriestley) +* Fix problems with using LFS in symlinked folders #818 (@sinbad) +* Fix git lfs push silently misbehaving on ambiguous refs; fail like git push instead #1118 (@sinbad) +* Whitelist `lfs.*.access` config in local ~/.lfsconfig #1122 (@rjbell4) +* Only write the encoded pointer information to Stdout #1105 (@sschuberth) +* Use hardcoded auth from remote or lfs config when accessing the storage api #1136 (@technoweenie, @jonmagic) +* SSH should be called more strictly with command as one argument #1134 (@sinbad) + +## 1.1.2 (1 March, 2016) + +* Fix Base64 issues with `?` #989 (@technoweenie) +* Fix zombie git proc issue #1012 (@rlaakkol) +* Fix problems with files containing unicode characters #1016 (@technoweenie) +* Fix panic in `git cat-file` parser #1006 (@technoweenie) +* Display error messages in non-fatal errors #1028 #1039 #1042 (@technoweenie) +* Fix concurrent map access in progress meter (@technoweenie) + +## 1.1.1 (4 February, 2016) + +### Features + +* Add copy-on-write support for Linux BTRFS filesystem #952 (@bozaro) +* convert `git://` remotes to LFS servers automatically #964 (@technoweenie) +* Fix `git lfs track` handling of absolute paths. #975 (@technoweenie) +* Allow tunable http client timeouts #977 (@technoweenie) + +### Bugs + +* Suppress git config warnings for non-LFS keys #861 (@technoweenie) +* Fix fallthrough when `git-lfs-authenticate` returns an error #909 (@sinbad) +* Fix progress bar issue #883 (@pokehanai) +* Support `remote.name.pushurl` config #949 (@sinbad) +* Fix handling of `GIT_DIR` and `GIT_WORK_TREE` #963, #971 (@technoweenie) +* Fix handling of zero length files #966 (@nathanhi) +* Guard against invalid remotes passed to `push` and `pre-push` #974 (@technoweenie) +* Fix race condition in `git lfs pull` #972 (@technoweenie) + +### Extra + +* Add server API test tool #868 (@sinbad) +* Redo windows installer with innosetup #875 (@strich) +* Pre-built packages are built with Go v1.5.3 + +## 1.1.0 (18 November, 2015) + +* NTLM auth support #820 (@WillHipschman, @technoweenie) +* Add `prune` command #742 (@sinbad) +* Use .lfsconfig instead of .gitconfig #837 (@technoweenie) +* Rename "init" command #838 (@technoweenie) +* Raise error if credentials are needed #842 (@technoweenie) +* Support git repos in symlinked directories #818 (@sinbad, @difro, @jiangxin) +* Fix "git lfs env" to show correct SSH remote info #828 (@jiangxin) + +## 1.0.2 (28 October, 2015) + +* Fix issue with 'git lfs smudge' and the batch API. #795 (@technoweenie) +* Fix race condition in the git scanning code. #801 (@technoweenie) + +## 1.0.1 (23 October, 2015) + +* Downcase git config keys (prevents Auth loop) #690 (@WillHipschman) +* Show more info for unexpected http responses #710 (@rubyist) +* Use separate stdout/stderr buffers for `git-lfs-authenticate` #718 (@bozaro) +* Use LoggedError instead of Panic if update-index fails in checkout #735 (@sinbad) +* `smudge` command exits with non-zero if the download fails #732 (@rubyist) +* Use `git rev-parse` to find the git working dir #692 (@sinbad) +* Improved default remote behaviour & validation for fetch/pull #713 (@sinbad) +* Make fetch return error code when 1+ downloads failed #734 (@sinbad) +* Improve lfs.InRepo() detection in `init`/`update` #756 (@technoweenie) +* Teach smudge to use the batch api #711 (@rubyist) +* Fix not setting global attribute when needed to b/c of local state #765 (@sinbad) +* Fix clone fail when fetch is excluded globally #770 (@sinbad) +* Fix for partial downloads problem #763 (@technoweenie) +* Get integration tests passing on Windows #771 (@sinbad) + +### Security + +* Whitelist the valid keys read from .gitconfig #760 (@technoweenie) + +This prevents unsafe git configuration values from being used by Git LFS. + +## v1.0 (1 October, 2015) + +* Manual reference is integrated into the "help" options #665 @sinbad +* Fix `ls-files` when run from an empty repository #668 @Aorjoa +* Support listing duplicate files in `ls-files` #681 @Aorjoa @technoweenie +* `update` and `init` commands can install the pre-push hook in bare repositories #671 @technoweenie +* Add `GIT_LFS_SKIP_SMUDGE` and `init --skip-smudge` #679 @technoweenie + +## v0.6.0 (10 September, 2015) + +This is the first release that uses the new Batch API by default, while still +falling back to the Legacy API automatically. Also, new fetch/checkout/push +commands have been added. + +Run `git lfs update` in any local repositories to make sure all config settings +are updated. + +### Fetch + +* Rename old `git lfs fetch` command to `git lfs pull`. #527 (@sinbad) +* Add `git lfs checkout` #527 #543 #551 #566 (@sinbad) +* Add `git lfs fetch` for _just_ downloading objects. #527 (@sinbad) + * Add `remote` arg, and default to tracking remote instead of "origin". #583 (@sinbad) + * Support fetching multiple refs #542 (@sinbad) + * Add `--include` and `--exclude` flag for `git lfs fetch` #573 (@sinbad) + * Add `--recent` flag for downloading recent files outside of the current + checkout. #610 (@sinbad) + * Add `--all` option for download all objects from the server. #633 (@sinbad) +* Fix error handling while `git update-index` is running. #570 (@rubyist) + +See [git-lfs-fetch(1)](https://github.com/git-lfs/git-lfs/blob/v0.6.0/docs/man/git-lfs-fetch.1.ronn), +[git-lfs-checkout(1)](https://github.com/git-lfs/git-lfs/blob/v0.6.0/docs/man/git-lfs-checkout.1.ronn), +and [git-lfs-pull(1)](https://github.com/git-lfs/git-lfs/blob/v0.6.0/docs/man/git-lfs-pull.1.ronn) + for details. + +### Push + +* Support pushing multiple branches in the pre-push hook. #635 (@sinbad) +* Fix pushing objects from a branch that's not HEAD. #608 (@sinbad) +* Check server for objects before failing push because local is missing. #581 (@sinbad) +* Filter out commits from remote refs when pushing. #578 (@billygor) +* Support pushing all objects to the server, regardless of the remote ref. #646 (@technoweenie) +* Fix case where pre-push git hook exits with 0. #582 (@sinbad) + +See [git-lfs-push(1)](https://github.com/git-lfs/git-lfs/blob/v0.6.0/docs/man/git-lfs-push.1.ronn) for details. + +### API Clients + +* Fix some race conditions in the Batch API client. #577 #637 (@sinbad, @rubyist) +* Support retries in the Batch API client. #595 (@rubyist) +* Fix hanging batch client in certain error conditions. #594 (@rubyist) +* Treat 401 responses as errors in the Legacy API client. #634 (@rubyist) +* Fix bug in the Legacy API client when the object already exists on the server. #572 (@billygor) + +### Credentials + +* Fix how git credentials are checked in certain edge cases. #611 #650 #652 (@technoweenie) +* Send URI user to git credentials. #626 (@sinbad) +* Support git credentials with useHttpPath enabled. #554 (@clareliguori) + +### Installation + +* Docker images and scripts for building and testing linux packages. #511 #526 #555 #603 (@andyneff, @ssgelm) +* Create Windows GUI installer. #642 (@technoweenie) +* Binary releases use Go 1.5, which includes fix for Authorization when the + request URL includes just the username. [golang/go#11399](https://github.com/golang/go/issues/11399) + +### Misc + +* Documented Git config values used by Git LFS in [git-lfs-config(5)](https://github.com/git-lfs/git-lfs/blob/v0.6.0/docs/man/git-lfs-config.5.ronn). #610 (@sinbad) +* Experimental support for Git worktrees (in Git 2.5+) #546 (@sinbad) +* Experimental extension support. #486 (@ryansimmen) + +## v0.5.4 (30 July, 2015) + +* Ensure `git lfs uninit` cleans your git config thoroughly. #530 (@technoweenie) +* Fix issue with asking `git-credentials` for auth details after getting them +from the SSH command. #534 (@technoweenie) + +## v0.5.3 (23 July, 2015) + +* `git lfs fetch` bugs #429 (@rubyist) +* Push can crash on 32 bit architectures #450 (@rubyist) +* Improved SSH support #404, #464 (@sinbad, @technoweenie) +* Support 307 redirects with relative url #442 (@sinbad) +* Fix `init` issues when upgrading #446 #451 #452 #465 (@technoweenie, @rubyist) +* Support chunked Transfer-Encoding #386 (@ryansimmen) +* Fix issue with pushing deleted objects #461 (@technoweenie) +* Teach `git lfs push` how to send specific objects #449 (@larsxschneider) +* Update error message when attempting to push objects that don't exist in `.git/lfs/objects` #447 (@technoweenie) +* Fix bug in HTTP client when response body is nil #472 #488 (@rubyist, @technoweenie) +* `-crlf` flag in gitattributes is deprecated #475 (@technoweenie) +* Improvements to the CentOS and Debian build and package scripts (@andyneff, @ssgelm) + +## v0.5.2 (19 June, 2015) + +* Add `git lfs fetch` command for downloading objects. #285 (@rubyist) +* Fix `git lfs track` issues when run outside of a git repository #312, #323 (@michael-k, @Aorjoa) +* Fix `git lfs track` for paths with spaces in them #327 (@technoweenie) +* Fix `git lfs track` by writing relative paths to .gitattributes #356 (@michael-k) +* Fix `git lfs untrack` so it doesn't remove entries incorrectly from .gitattributes #398 (@michael-k) +* Fix `git lfs clean` bug with zero length files #346 (@technoweenie) +* Add `git lfs fsck` #373 (@zeroshirts, @michael-k) +* The Git pre-push warns if Git LFS is not installed #339 (@rubyist) +* Fix Content-Type header sent by the HTTP client #329 (@joerg) +* Improve performance tracing while scanning refs #311 (@michael-k) +* Fix detection of LocalGitDir and LocalWorkingDir #312 #354 #361 (@michael-k) +* Fix inconsistent file mode bits for directories created by Git LFS #364 (@michael-k) +* Optimize shell execs #377, #382, #391 (@bozaro) +* Collect HTTP transfer stats #366, #400 (@rubyist) +* Support GIT_DIR and GIT_WORK_TREE #370 (@michael-k) +* Hide Git application window in Windows #381 (@bozaro) +* Add support for configured URLs containing credentials per RFC1738 #408 (@ewbankkit, @technoweenie) +* Add experimental support for batch API calls #285 (@rubyist) +* Improve linux build instructions for CentOS and Debian. #299 #309 #313 #332 (@jsh, @ssgelm, @andyneff) + +## v0.5.1 (30 April, 2015) + +* Fix Windows install.bat script. #223 (@PeterDaveHello) +* Fix bug where `git lfs clean` will clean Git LFS pointers too #271 (@technoweenie) +* Better timeouts for the HTTP client #215 (@Mistobaan) +* Concurrent uploads through `git lfs push` #258 (@rubyist) +* Fix `git lfs smudge` behavior with zero-length file in `.git/lfs/objects` #267 (@technoweenie) +* Separate out pre-push hook behavior from `git lfs push` #263 (@technoweenie) +* Add diff/merge properties to .gitattributes #265 (@technoweenie) +* Respect `GIT_TERMINAL_PROMPT ` #257 (@technoweenie) +* Fix CLI progress bar output #185 (@technoweenie) +* Fail fast in `clean` and `smudge` commands when run without STDIN #264 (@technoweenie) +* Fix shell quoting in pre-push hook. #235 (@mhagger) +* Fix progress bar output during file uploads. #185 (@technoweenie) +* Change `remote.{name}.lfs_url` to `remote.{name}.lfsurl` #237 (@technoweenie) +* Swap `git config` order. #245 (@technoweenie) +* New `git lfs pointer` command for generating and comparing pointers #246 (@technoweenie) +* Follow optional "href" property from git-lfs-authenticate SSH command #247 (@technoweenie) +* `.git/lfs/objects` spec clarifications: #212 (@rtyley), #244 (@technoweenie) +* man page updates: #228 (@mhagger) +* pointer spec clarifications: #246 (@technoweenie) +* Code comments for the untrack command: #225 (@thekafkaf) + +## v0.5.0 (10 April, 2015) + +* Initial public release diff --git a/git-lfs-3.2.0/README.md b/git-lfs-3.2.0/README.md new file mode 100644 index 0000000000000000000000000000000000000000..da34c6264dcf52995e5a1b6f49a34bb6c0cfdc85 --- /dev/null +++ b/git-lfs-3.2.0/README.md @@ -0,0 +1,259 @@ +# Git Large File Storage + +[![CI status][ci_badge]][ci_url] + +[ci_badge]: https://github.com/git-lfs/git-lfs/workflows/CI/badge.svg +[ci_url]: https://github.com/git-lfs/git-lfs/actions?query=workflow%3ACI + +[Git LFS](https://git-lfs.github.com) is a command line extension and +[specification](docs/spec.md) for managing large files with Git. + +The client is written in Go, with pre-compiled binaries available for Mac, +Windows, Linux, and FreeBSD. Check out the [website](http://git-lfs.github.com) +for an overview of features. + +## Getting Started + +### Downloading + +You can install the Git LFS client in several different ways, depending on your +setup and preferences. + +* **Linux users**. Debian and RPM packages are available from + [PackageCloud](https://packagecloud.io/github/git-lfs/install). +* **macOS users**. [Homebrew](https://brew.sh) bottles are distributed, and can + be installed via `brew install git-lfs`. +* **Windows users**. Git LFS is included in the distribution of + [Git for Windows](https://gitforwindows.org/). Alternatively, you can + install a recent version of Git LFS from the [Chocolatey](https://chocolatey.org/) package manager. +* **Binary packages**. In addition, [binary packages](https://github.com/git-lfs/git-lfs/releases) are +available for Linux, macOS, Windows, and FreeBSD. +* **Building from source**. [This repository](https://github.com/git-lfs/git-lfs.git) can also be +built from source using the latest version of [Go](https://golang.org), and the +available instructions in our +[Wiki](https://github.com/git-lfs/git-lfs/wiki/Installation#source). + +Note that Debian and RPM packages are built for all OSes for amd64 and i386. +For arm64, only Debian packages for the latest Debian release are built due to the cost of building in emulation. + +### Installing + +#### From binary + +The [binary packages](https://github.com/git-lfs/git-lfs/releases) include a script which will: + +- Install Git LFS binaries onto the system `$PATH` +- Run `git lfs install` to +perform required global configuration changes. + +```ShellSession +$ ./install.sh +``` + +#### From source + +- Ensure you have the latest version of Go, GNU make, and a standard Unix-compatible build environment installed. +- On Windows, install `goversioninfo` with `go install github.com/josephspurrier/goversioninfo/cmd/goversioninfo@latest`. +- Run `make`. +- Place the `git-lfs` binary, which can be found in `bin`, on your system’s executable `$PATH` or equivalent. +- Git LFS requires global configuration changes once per-machine. This can be done by +running: + +```ShellSession +$ git lfs install +``` + +#### Verifying releases + +Releases are signed with the OpenPGP key of one of the core team members. To +get these keys, you can run the following command, which will print them to +standard output: + +```ShellSession +$ curl -L https://api.github.com/repos/git-lfs/git-lfs/tarball/core-gpg-keys | tar -Ozxf - +``` + +Once you have the keys, you can download the `sha256sums.asc` file and verify +the file you want like so: + +```ShellSession +$ gpg -d sha256sums.asc | grep git-lfs-linux-amd64-v2.10.0.tar.gz | shasum -a 256 -c +``` + +For the convenience of distributors, we also provide a wider variety of signed +hashes in the `hashes.asc` file. Those hashes are in the tagged BSD format, but +can be verified with Perl's `shasum` or the GNU hash utilities, just like the +ones in `sha256sums.asc`. + +## Example Usage + +To begin using Git LFS within a Git repository that is not already configured +for Git LFS, you can indicate which files you would like Git LFS to manage. +This can be done by running the following _from within a Git repository_: + +```bash +$ git lfs track "*.psd" +``` + +(Where `*.psd` is the pattern of filenames that you wish to track. You can read +more about this pattern syntax +[here](https://git-scm.com/docs/gitattributes)). + +> *Note:* the quotation marks surrounding the pattern are important to +> prevent the glob pattern from being expanded by the shell. + +After any invocation of `git-lfs-track(1)` or `git-lfs-untrack(1)`, you _must +commit changes to your `.gitattributes` file_. This can be done by running: + +```bash +$ git add .gitattributes +$ git commit -m "track *.psd files using Git LFS" +``` + +You can now interact with your Git repository as usual, and Git LFS will take +care of managing your large files. For example, changing a file named `my.psd` +(tracked above via `*.psd`): + +```bash +$ git add my.psd +$ git commit -m "add psd" +``` + +> _Tip:_ if you have large files already in your repository's history, `git lfs +> track` will _not_ track them retroactively. To migrate existing large files +> in your history to use Git LFS, use `git lfs migrate`. For example: +> +> ``` +> $ git lfs migrate import --include="*.psd" --everything +> ``` +> +> **Note that this will rewrite history and change all of the Git object IDs in your +> repository, just like the export version of this command.** +> +> For more information, read [`git-lfs-migrate(1)`](https://github.com/git-lfs/git-lfs/blob/main/docs/man/git-lfs-migrate.1.ronn). + +You can confirm that Git LFS is managing your PSD file: + +```bash +$ git lfs ls-files +3c2f7aedfb * my.psd +``` + +Once you've made your commits, push your files to the Git remote: + +```bash +$ git push origin main +Uploading LFS objects: 100% (1/1), 810 B, 1.2 KB/s +# ... +To https://github.com/git-lfs/git-lfs-test + 67fcf6a..47b2002 main -> main +``` + +Note: Git LFS requires at least Git 1.8.2 on Linux or 1.8.5 on macOS. + +### Uninstalling + +If you've decided that Git LFS isn't right for you, you can convert your +repository back to a plain Git repository with `git lfs migrate` as well. For +example: + +```ShellSession +$ git lfs migrate export --include="*.psd" --everything +``` + +**Note that this will rewrite history and change all of the Git object IDs in your +repository, just like the import version of this command.** + +If there's some reason that things aren't working out for you, please let us +know in an issue, and we'll definitely try to help or get it fixed. + +## Limitations + +Git LFS maintains a list of currently known limitations, which you can find and +edit [here](https://github.com/git-lfs/git-lfs/wiki/Limitations). + +Git LFS source code utilizes Go modules in its build system, and therefore this +project contains a `go.mod` file with a defined Go module path. However, we +do not maintain a stable Go language API or ABI, as Git LFS is intended to be +used solely as a compiled binary utility. Please do not import the `git-lfs` +module into other Go code and do not rely on it as a source code dependency. + +## Need Help? + +You can get help on specific commands directly: + +```bash +$ git lfs help +``` + +The [official documentation](docs) has command references and specifications for +the tool. There's also a [FAQ](https://github.com/git-lfs/git-lfs/wiki/FAQ) on +the wiki which answers some common questions. + +If you have a question on how to use Git LFS, aren't sure about something, or +are looking for input from others on tips about best practices or use cases, +feel free to +[start a discussion](https://github.com/git-lfs/git-lfs/discussions). + +You can always [open an issue](https://github.com/git-lfs/git-lfs/issues), and +one of the Core Team members will respond to you. Please be sure to include: + +1. The output of `git lfs env`, which displays helpful information about your + Git repository useful in debugging. +2. Any failed commands re-run with `GIT_TRACE=1` in the environment, which + displays additional information pertaining to why a command crashed. + +## Contributing + +See [CONTRIBUTING.md](CONTRIBUTING.md) for info on working on Git LFS and +sending patches. Related projects are listed on the [Implementations wiki +page](https://github.com/git-lfs/git-lfs/wiki/Implementations). + +See also [SECURITY.md](SECURITY.md) for info on how to submit reports +of security vulnerabilities. + +## Core Team + +These are the humans that form the Git LFS core team, which runs the project. + +In alphabetical order: + +| [@bk2204][bk2204-user] | [@chrisd8088][chrisd8088-user] | [@larsxschneider][larsxschneider-user] | +| :---: | :---: | :---: | +| [![][bk2204-img]][bk2204-user] | [![][chrisd8088-img]][chrisd8088-user] | [![][larsxschneider-img]][larsxschneider-user] | +| [PGP 0223B187][bk2204-pgp] | [PGP 088335A9][chrisd8088-pgp] | [PGP A5795889][larsxschneider-pgp] | + +[bk2204-img]: https://avatars1.githubusercontent.com/u/497054?s=100&v=4 +[chrisd8088-img]: https://avatars1.githubusercontent.com/u/28857117?s=100&v=4 +[larsxschneider-img]: https://avatars1.githubusercontent.com/u/477434?s=100&v=4 +[bk2204-user]: https://github.com/bk2204 +[chrisd8088-user]: https://github.com/chrisd8088 +[larsxschneider-user]: https://github.com/larsxschneider +[bk2204-pgp]: https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x88ace9b29196305ba9947552f1ba225c0223b187 +[chrisd8088-pgp]: https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x86cd3297749375bcf8206715f54fe648088335a9 +[larsxschneider-pgp]: https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xaa3b3450295830d2de6db90caba67be5a5795889 + +### Alumni + +These are the humans that have in the past formed the Git LFS core team, or +have otherwise contributed a significant amount to the project. Git LFS would +not be possible without them. + +In alphabetical order: + +| [@andyneff][andyneff-user] | [@PastelMobileSuit][PastelMobileSuit-user] | [@rubyist][rubyist-user] | [@sinbad][sinbad-user] | [@technoweenie][technoweenie-user] | [@ttaylorr][ttaylorr-user] | +| :---: | :---: | :---: | :---: | :---: | :---: | +| [![][andyneff-img]][andyneff-user] | [![][PastelMobileSuit-img]][PastelMobileSuit-user] | [![][rubyist-img]][rubyist-user] | [![][sinbad-img]][sinbad-user] | [![][technoweenie-img]][technoweenie-user] | [![][ttaylorr-img]][ttaylorr-user] | + +[andyneff-img]: https://avatars1.githubusercontent.com/u/7596961?v=3&s=100 +[PastelMobileSuit-img]: https://avatars2.githubusercontent.com/u/37254014?s=100&v=4 +[rubyist-img]: https://avatars1.githubusercontent.com/u/143?v=3&s=100 +[sinbad-img]: https://avatars1.githubusercontent.com/u/142735?v=3&s=100 +[technoweenie-img]: https://avatars3.githubusercontent.com/u/21?v=3&s=100 +[ttaylorr-img]: https://avatars2.githubusercontent.com/u/443245?s=100&v=4 +[andyneff-user]: https://github.com/andyneff +[PastelMobileSuit-user]: https://github.com/PastelMobileSuit +[sinbad-user]: https://github.com/sinbad +[rubyist-user]: https://github.com/rubyist +[technoweenie-user]: https://github.com/technoweenie +[ttaylorr-user]: https://github.com/ttaylorr diff --git a/git-lfs-3.2.0/git-lfs b/git-lfs-3.2.0/git-lfs new file mode 100644 index 0000000000000000000000000000000000000000..96092eb2c4ad5c0737aa541dff7d352baa28fe0c --- /dev/null +++ b/git-lfs-3.2.0/git-lfs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05309d4bc952e77251de76d8dda6bfbb9e9f532757abfc41a454f0e8327d7e33 +size 10768384 diff --git a/git-lfs-3.2.0/install.sh b/git-lfs-3.2.0/install.sh new file mode 100644 index 0000000000000000000000000000000000000000..28e45f1b6f474e039cf05700e3fd43c85876dc15 --- /dev/null +++ b/git-lfs-3.2.0/install.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +set -eu + +prefix="/usr/local" + +if [ "${PREFIX:-}" != "" ] ; then + prefix=${PREFIX:-} +elif [ "${BOXEN_HOME:-}" != "" ] ; then + prefix=${BOXEN_HOME:-} +fi + +mkdir -p $prefix/bin +rm -rf $prefix/bin/git-lfs* + +pushd "$( dirname "${BASH_SOURCE[0]}" )" > /dev/null + for g in git*; do + install $g "$prefix/bin/$g" + done +popd > /dev/null + +PATH+=:$prefix/bin +git lfs install diff --git a/git-lfs-3.2.0/man/html/git-lfs-checkout.1.html b/git-lfs-3.2.0/man/html/git-lfs-checkout.1.html new file mode 100644 index 0000000000000000000000000000000000000000..9455cb3c1789d55ecfd8f6b132266540e4f3e2b3 --- /dev/null +++ b/git-lfs-3.2.0/man/html/git-lfs-checkout.1.html @@ -0,0 +1,178 @@ + + + + + + git-lfs-checkout(1) - Update working copy with file content if available + + + + +
+ + + +
    +
  1. git-lfs-checkout(1)
  2. +
  3. +
  4. git-lfs-checkout(1)
  5. +
+ + + +

NAME

+

+ git-lfs-checkout - Update working copy with file content if available +

+

SYNOPSIS

+ +

git lfs checkout [glob-pattern...]
+git lfs checkout --to file {--base|--ours|--theirs} conflict-obj-path

+ +

DESCRIPTION

+ +

Try to ensure that the working copy contains file content for Git LFS objects +for the current ref, if the object data is available. Does not download any +content; see git-lfs-fetch(1) for that.

+ +

Checkout scans the current ref for all LFS objects that would be required, then +where a file is either missing in the working copy, or contains placeholder +pointer content with the same SHA, the real file content is written, provided +we have it in the local store. Modified files are never overwritten.

+ +

One or more glob-patterns may be provided as arguments to restrict the +set of files that are updated. Glob patterns are matched as per the format +described in gitignore(5).

+ +

When used with --to and the working tree is in a conflicted state due to a +merge, this option checks out one of the three stages a conflicting Git LFS +object into a separate file (which can be outside of the work tree). +This can make using diff tools to inspect and resolve merges easier. +A single Git LFS object's file path must be provided in conflict-obj-path.

+ +

OPTIONS

+ +
+
--base
+
Check out the merge base of the specified file.
+
--ours
+
Check out our side (that of the current branch) of the conflict for the +specified file.
+
--theirs
+
Check out their side (that of the other branch) of the conflict for the +specified file.
+
+--to path +
+
If the working tree is in a conflicted state, check out the portion of the +conflict specified by --base, --ours, or --theirs to the given path.
+
+ +

EXAMPLES

+ +
+
Checkout all files that are missing or placeholders:
+
+
+ +

+$ git lfs checkout +

+ +
+
Checkout a specific couple of files:
+
+
+ +

+$ git lfs checkout path/to/file1.png path/to.file2.png +

+ +
+
Checkout a path with a merge conflict into separate files:
+
+
+ +

``` +# Attempt merge with a branch that has a merge conflict +$ git merge conflicting-branch +CONFLICT (content): Merge conflict in path/to/conflicting/file.dat

+ +

Checkout versions of the conflicting file into temp files

+

$ git lfs checkout ours.dat --ours path/to/conflicting/file.dat +$ git lfs checkout theirs.dat --theirs path/to/conflicting/file.dat

+ +

Compare conflicting versions in ours.dat and theirs.dat,

+

# then resolve conflict (e.g., by choosing one version over +# the other, or creating a new version)

+ +

Cleanup and continue with merge

+

$ rm ours.dat theirs.dat +$ git add path/to/conflicting/file.dat +$ git merge --continue +```

+ +

SEE ALSO

+ +

git-lfs-fetch(1), git-lfs-pull(1), gitignore(5).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-checkout(1)
  5. +
+ +
+ + diff --git a/git-lfs-3.2.0/man/html/git-lfs-clean.1.html b/git-lfs-3.2.0/man/html/git-lfs-clean.1.html new file mode 100644 index 0000000000000000000000000000000000000000..c0ec25731e3e4155c968bb513db624acf78f06e4 --- /dev/null +++ b/git-lfs-3.2.0/man/html/git-lfs-clean.1.html @@ -0,0 +1,104 @@ + + + + + + git-lfs-clean(1) - Git clean filter that converts large files to pointers + + + + +
+ + + +
    +
  1. git-lfs-clean(1)
  2. +
  3. +
  4. git-lfs-clean(1)
  5. +
+ + + +

NAME

+

+ git-lfs-clean - Git clean filter that converts large files to pointers +

+

SYNOPSIS

+ +

git lfs clean path

+ +

DESCRIPTION

+ +

Read the contents of a large file from standard input, and write a Git +LFS pointer file for that file to standard output.

+ +

Clean is typically run by Git's clean filter, configured by the repository's +Git attributes.

+ +

Clean is not part of the user-facing Git plumbing commands. To preview the +pointer of a large file as it would be generated, see the git-lfs-pointer(1) +command.

+ +

SEE ALSO

+ +

git-lfs-install(1), git-lfs-push(1), git-lfs-pointer(1), gitattributes(5).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-clean(1)
  5. +
+ +
+ + diff --git a/git-lfs-3.2.0/man/html/git-lfs-clone.1.html b/git-lfs-3.2.0/man/html/git-lfs-clone.1.html new file mode 100644 index 0000000000000000000000000000000000000000..8a59b2146cb55656c21fcbf23e6431a8478d9505 --- /dev/null +++ b/git-lfs-3.2.0/man/html/git-lfs-clone.1.html @@ -0,0 +1,145 @@ + + + + + + git-lfs-clone(1) - Efficiently clone a LFS-enabled repository + + + + +
+ + + +
    +
  1. git-lfs-clone(1)
  2. +
  3. +
  4. git-lfs-clone(1)
  5. +
+ + + +

NAME

+

+ git-lfs-clone - Efficiently clone a LFS-enabled repository +

+

SYNOPSIS

+ +

git lfs clone [git clone options] repository [directory]

+ +

DESCRIPTION

+ +

Clone an LFS enabled Git repository more efficiently by disabling LFS during the +git clone, then performing a 'git lfs pull' directly afterwards.

+ +

'git lfs clone' also installs all of the repo-level hooks (.git/hooks) that LFS +requires to operate. If --separate-git-dir is given, the hooks will be +installed there.

+ +

This is faster than a regular 'git clone' because that will download LFS content +using the smudge filter, which is executed individually per file in the working +copy. This is relatively inefficient compared to the batch mode and parallel +downloads performed by 'git lfs pull'.

+ +

OPTIONS

+ +

All options supported by 'git clone'

+ +
+
+-I paths --include=paths +
+
See INCLUDE AND EXCLUDE +
+
+-X paths --exclude=paths +
+
See INCLUDE AND EXCLUDE +
+
--skip-repo
+
Skip installing repo-level hooks (.git/hooks) that LFS requires. Disabled by +default.
+
+ +

INCLUDE AND EXCLUDE

+ +

You can configure Git LFS to only fetch objects to satisfy references in certain +paths of the repo, and/or to exclude certain paths of the repo, to reduce the +time you spend downloading things you do not use.

+ +

In your Git configuration or in a .lfsconfig file, you may set either or +both of lfs.fetchinclude and lfs.fetchexclude to comma-separated lists of +paths. If lfs.fetchinclude is defined, Git LFS objects will only be fetched +if their path matches one in that list, and if lfs.fetchexclude is defined, +Git LFS objects will only be fetched if their path does not match one +in that list. Paths are matched using wildcard matching as per gitignore(5).

+ +

Note that using the command-line options -I and -X override the respective +configuration settings. Setting either option to an empty string clears the +value.

+ +

SEE ALSO

+ +

git-clone(1), git-lfs-pull(1), gitignore(5).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-clone(1)
  5. +
+ +
+ + diff --git a/git-lfs-3.2.0/man/html/git-lfs-config.5.html b/git-lfs-3.2.0/man/html/git-lfs-config.5.html new file mode 100644 index 0000000000000000000000000000000000000000..5c76e9666143b5f50b4f99fb0979bd845fe87e30 --- /dev/null +++ b/git-lfs-3.2.0/man/html/git-lfs-config.5.html @@ -0,0 +1,604 @@ + + + + + + git-lfs-config(5) - Configuration options for git-lfs + + + + +
+ + + +
    +
  1. git-lfs-config(5)
  2. +
  3. +
  4. git-lfs-config(5)
  5. +
+ + + +

NAME

+

+ git-lfs-config - Configuration options for git-lfs +

+

CONFIGURATION FILES

+ +

git-lfs reads its configuration from any file supported by git config -l, +including all per-repository, per-user, and per-system Git configuration files.

+ +

Additionally, a small number of settings can be specified in a file called +.lfsconfig at the root of the repository; see the "LFSCONFIG" section for more +details. This configuration file is useful for setting options such as the LFS +URL or access type for all users of a repository, especially when these differ +from the default. The .lfsconfig file uses the same format as .gitconfig.

+ +

If the .lfsconfig file is missing, the index is checked for a version of the +file, and that is used instead. If both are missing, HEAD is checked for the +file. If the repository is bare, only HEAD is checked. This order may change +for checkouts in the future to better match Git's behavior.

+ +

Settings from Git configuration files override the .lfsconfig file. This +allows you to override settings like lfs.url in your local environment without +having to modify the .lfsconfig file.

+ +

Most options regarding git-lfs are contained in the [lfs] section, meaning +they are all named lfs.foo or similar, although occasionally an lfs option can +be scoped inside the configuration for a remote.

+ +

LIST OF OPTIONS

+ +

General settings

+ +
    +
  • +

    lfs.url / remote.<remote>.lfsurl

    + +

    The url used to call the Git LFS remote API. Default blank (derive from clone +URL).

    +
  • +
  • +

    lfs.pushurl / remote.<remote>.lfspushurl

    + +

    The url used to call the Git LFS remote API when pushing. Default blank (derive +from either LFS non-push urls or clone url).

    +
  • +
  • +

    remote.lfsdefault

    + +

    The remote used to find the Git LFS remote API. lfs.url and +branch.*.remote for the current branch override this setting. If this +setting is not specified and there is exactly one remote, that remote is +picked; otherwise, the default is origin.

    +
  • +
  • +

    remote.lfspushdefault

    + +

    The remote used to find the Git LFS remote API when pushing. lfs.url and +branch.*.pushremote for the current branch override this setting. If this +setting is not set, remote.pushdefault is used, or if that is not set, the +order of selection is used as specified in the remote.lfsdefault above.

    +
  • +
  • +

    lfs.dialtimeout

    + +

    Sets the maximum time, in seconds, that the HTTP client will wait to initiate +a connection. This does not include the time to send a request and wait for a +response. Default: 30 seconds

    +
  • +
  • +

    lfs.tlstimeout

    + +

    Sets the maximum time, in seconds, that the HTTP client will wait for a TLS +handshake. Default: 30 seconds.

    +
  • +
  • +

    lfs.activitytimeout / lfs.https://<host>.activitytimeout

    + +

    Sets the maximum time, in seconds, that the HTTP client will wait for the +next tcp read or write. If < 1, no activity timeout is used at all. +Default: 30 seconds

    +
  • +
  • +

    lfs.keepalive

    + +

    Sets the maximum time, in seconds, for the HTTP client to maintain keepalive +connections. Default: 30 minutes.

    +
  • +
  • +

    lfs.ssh.automultiplex

    + +

    When using the pure SSH-based protocol, whether to multiplex requests over a +single connection when possible. This option requires the use of OpenSSH or a +compatible SSH client. Default: true.

    +
  • +
  • +

    lfs.ssh.retries

    + +

    Specifies the number of times Git LFS will attempt to obtain authorization via +SSH before aborting. Default: 5.

    +
  • +
  • +

    core.askpass, GIT_ASKPASS

    + +

    Given as a program and its arguments, this is invoked when authentication is +needed against the LFS API. The contents of stdout are interpreted as the +password.

    +
  • +
  • +

    lfs.cachecredentials

    + +

    Enables in-memory SSH and Git Credential caching for a single 'git lfs' +command. Default: enabled.

    +
  • +
  • +

    lfs.storage

    + +

    Allow override LFS storage directory. Non-absolute path is relativized to +inside of Git repository directory (usually .git).

    + +

    Note: you should not run git lfs prune if you have different repositories +sharing the same storage directory.

    + +

    Default: lfs in Git repository directory (usually .git/lfs).

    +
  • +
  • +

    lfs.largefilewarning

    + +

    Warn when a file is 4 GiB or larger. Such files will be corrupted when using +Windows (unless smudging is disabled) with a Git for Windows version less than +2.34.0 due to a limitation in Git. Default: true if the version is less than +2.34.0, false otherwise.

    +
  • +
+ +

Upload and download transfer settings

+ +

These settings control how the upload and download of LFS content occurs.

+ +
    +
  • +

    lfs.concurrenttransfers

    + +

    The number of concurrent uploads/downloads. Default 8.

    +
  • +
  • +

    lfs.basictransfersonly

    + +

    If set to true, only basic HTTP upload/download transfers will be used, +ignoring any more advanced transfers that the client/server may support. +This is primarily to work around bugs or incompatibilities.

    + +

    The git-lfs client supports basic HTTP downloads, resumable HTTP downloads +(using Range headers), and resumable uploads via tus.io protocol. Custom +transfer methods can be added via lfs.customtransfer (see next section). +However setting this value to true limits the client to simple HTTP.

    +
  • +
  • +

    lfs.tustransfers

    + +

    If set to true, this enables resumable uploads of LFS objects through the +tus.io API. Once this feature is finalized, this setting will be removed, +and tus.io uploads will be available for all clients.

    +
  • +
  • +

    lfs.standalonetransferagent

    + +

    Allows the specified custom transfer agent to be used directly +for transferring files, without asking the server how the transfers +should be made. The custom transfer agent has to be defined in a +lfs.customtransfer.<name> settings group.

    +
  • +
  • +

    lfs.customtransfer.<name>.path

    + +

    lfs.customtransfer.<name> is a settings group which defines a custom +transfer hook which allows you to upload/download via an intermediate process, +using any mechanism you like (rather than just HTTP). path should point to +the process you wish to invoke. The protocol between the git-lfs client and +the custom transfer process is documented at +https://github.com/git-lfs/git-lfs/blob/main/docs/custom-transfers.md

    + +

    name must be a unique identifier that the LFS server understands. When +calling the LFS API the client will include a list of supported transfer +types. If the server also supports this named transfer type, it will select it +and actions returned from the API will be in relation to that transfer type +(may not be traditional URLs for example). Only if the server accepts name +as a transfer it supports will this custom transfer process be invoked.

    +
  • +
  • +

    lfs.customtransfer.<name>.args

    + +

    If the custom transfer process requires any arguments, these can be provided +here. This string will be expanded by the shell.

    +
  • +
  • +

    lfs.customtransfer.<name>.concurrent

    + +

    If true (the default), git-lfs will invoke the custom transfer process +multiple times in parallel, according to lfs.concurrenttransfers, splitting +the transfer workload between the processes.

    +
  • +
  • +

    lfs.customtransfer.<name>.direction

    + +

    Specifies which direction the custom transfer process supports, either +"download", "upload", or "both". The default if unspecified is "both".

    +
  • +
  • +

    lfs.transfer.maxretries

    + +

    Specifies how many retries LFS will attempt per OID before marking the +transfer as failed. Must be an integer which is at least one. If the value is +not an integer, is less than one, or is not given, a value of eight will be +used instead.

    +
  • +
  • +

    lfs.transfer.maxretrydelay

    + +

    Specifies the maximum time in seconds LFS will wait between each retry +attempt. LFS uses exponential backoff for retries, doubling the time between +each retry until reaching this limit. If a server requests a delay using the +Retry-After header, the header value overrides the exponential delay for +that attempt and is not limited by this option.

    + +

    Must be an integer which is not negative. Use zero to disable delays between +retries unless requested by a server. If the value is not an integer, is +negative, or is not given, a value of ten will be used instead.

    +
  • +
  • +

    lfs.transfer.maxverifies

    + +

    Specifies how many verification requests LFS will attempt per OID before +marking the transfer as failed, if the object has a verification action +associated with it. Must be an integer which is at least one. If the value is +not an integer, is less than one, or is not given, a default value of three +will be used instead.

    +
  • +
  • +

    lfs.transfer.enablehrefrewrite

    + +

    If set to true, this enables rewriting href of LFS objects using +url.*.insteadof/pushinsteadof config. pushinsteadof is used only for +uploading, and insteadof is used for downloading and for uploading when +pushinsteadof is not set.

    +
  • +
+ +

Push settings

+ +
    +
  • +

    lfs.allowincompletepush

    + +

    When pushing, allow objects to be missing from the local cache without halting +a Git push. Default: false.

    +
  • +
+ +

Fetch settings

+ +
    +
  • +

    lfs.fetchinclude

    + +

    When fetching, only download objects which match any entry on this +comma-separated list of paths/filenames. Wildcard matching is as per +gitignore(5). See git-lfs-fetch(1) for examples.

    +
  • +
  • +

    lfs.fetchexclude

    + +

    When fetching, do not download objects which match any item on this +comma-separated list of paths/filenames. Wildcard matching is as per +gitignore(5). See git-lfs-fetch(1) for examples.

    +
  • +
  • +

    lfs.fetchrecentrefsdays

    + +

    If non-zero, fetches refs which have commits within N days of the current +date. Only local refs are included unless lfs.fetchrecentremoterefs is true. +Also used as a basis for pruning old files. +The default is 7 days.

    +
  • +
  • +

    lfs.fetchrecentremoterefs

    + +

    If true, fetches remote refs (for the remote you're fetching) as well as local +refs in the recent window. This is useful to fetch objects for remote branches +you might want to check out later. The default is true; if you set this to +false, fetching for those branches will only occur when you either check them +out (losing the advantage of fetch --recent), or create a tracking local +branch separately then fetch again.

    +
  • +
  • +

    lfs.fetchrecentcommitsdays

    + +

    In addition to fetching at refs, also fetches previous changes made within N +days of the latest commit on the ref. This is useful if you're often reviewing +recent changes. Also used as a basis for pruning old files. +The default is 0 (no previous changes).

    +
  • +
  • +

    lfs.fetchrecentalways

    + +

    Always operate as if --recent was included in a git lfs fetch call. Default +false.

    +
  • +
+ +

Prune settings

+ +
    +
  • +

    lfs.pruneoffsetdays

    + +

    The number of days added to the lfs.fetchrecent* settings to determine what +can be pruned. Default is 3 days, i.e. that anything fetched at the very +oldest edge of the 'recent window' is eligible for pruning 3 days later.

    +
  • +
  • +

    lfs.pruneremotetocheck

    + +

    Set the remote that LFS files must have been pushed to in order for them to +be considered eligible for local pruning. Also the remote which is called if +--verify-remote is enabled.

    +
  • +
  • +

    lfs.pruneverifyremotealways

    + +

    Always run git lfs prune as if --verify-remote was provided.

    +
  • +
+ +

Extensions

+ +
    +
  • +

    lfs.extension.<name>.<setting>

    + +

    Git LFS extensions enable the manipulation of files streams during smudge and +clean. name groups the settings for a single extension, and the settings +are: +* clean The command which runs when files are added to the index +* smudge The command which runs when files are written to the working copy +* priority The order of this extension compared to others

    +
  • +
+ +

Other settings

+ +
    +
  • +

    lfs.<url>.access

    + +

    Note: this setting is normally set by LFS itself on receiving a 401 response +(authentication required), you don't normally need to set it manually.

    + +

    If set to "basic" then credentials will be requested before making batch +requests to this url, otherwise a public request will initially be attempted.

    +
  • +
  • +

    lfs.<url>.locksverify

    + +

    Determines whether locks are checked before Git pushes. This prevents you from +pushing changes to files that other users have locked. The Git LFS pre-push +hook varies its behavior based on the value of this config key.

    + +
      +
    • +null - In the absence of a value, Git LFS will attempt the call, and warn +if it returns an error. If the response is valid, Git LFS will set the value +to true, and will halt the push if the user attempts to update a file locked +by another user. If the server returns a 501 Not Implemented response, Git +LFS will set the value to false. +
    • +
    • +true - Git LFS will attempt to verify locks, halting the Git push if there +are any server issues, or if the user attempts to update a file locked by +another user.
    • +
    • +false - Git LFS will completely skip the lock check in the pre-push hook. +You should set this if you're not using File Locking, or your Git server +verifies locked files on pushes automatically.
    • +
    + +

    Supports URL config lookup as described in: +https://git-scm.com/docs/git-config#git-config-httplturlgt. To set this value +per-host: git config --global lfs.https://github.com/.locksverify [true|false].

    +
  • +
  • +

    lfs.<url>.contenttype

    + +

    Determines whether Git LFS should attempt to detect an appropriate HTTP +Content-Type header when uploading using the 'basic' upload adapter. If set +to false, the default header of Content-Type: application/octet-stream is +chosen instead. Default: 'true'.

    +
  • +
  • +

    lfs.skipdownloaderrors

    + +

    Causes Git LFS not to abort the smudge filter when a download error is +encountered, which allows actions such as checkout to work when you are unable +to download the LFS content. LFS files which could not download will contain +pointer content instead.

    + +

    Note that this will result in git commands which call the smudge filter to +report success even in cases when LFS downloads fail, which may affect +scripts.

    + +

    You can also set the environment variable GIT_LFS_SKIP_DOWNLOAD_ERRORS=1 to +get the same effect.

    +
  • +
  • +

    GIT_LFS_PROGRESS

    + +

    This environment variable causes Git LFS to emit progress updates to an +absolute file-path on disk when cleaning, smudging, or fetching.

    + +

    Progress is reported periodically in the form of a new line being appended to +the end of the file. Each new line will take the following format:

    + +

    <direction> <current>/<total files> <downloaded>/<total> <name>

    + +

    Each field is described below: +* direction: The direction of transfer, either "checkout", "download", or + "upload". +* current The index of the currently transferring file. +* total files The estimated count of all files to be transferred. +* downloaded The number of bytes already downloaded. +* total The entire size of the file, in bytes. +* name The name of the file.

    +
  • +
  • +

    GIT_LFS_FORCE_PROGRESS +lfs.forceprogress

    + +

    Controls whether Git LFS will suppress progress status when the standard +output stream is not attached to a terminal. The default is false which +makes Git LFS detect whether stdout is a terminal and suppress progress when +it's not; you can disable this behaviour and force progress status even when +standard output stream is not a terminal by setting either variable to 1, +'yes' or 'true'.

    +
  • +
  • +

    GIT_LFS_SKIP_SMUDGE

    + +

    Sets whether or not Git LFS will skip attempting to convert pointers of files +tracked into their corresponding objects when checked out into a working copy. +If 'true', '1', 'on', or similar, Git LFS will skip the smudge process in both +git lfs smudge and git lfs filter-process. If unset, or set to 'false', +'0', 'off', or similar, Git LFS will smudge files as normal.

    +
  • +
  • +

    GIT_LFS_SKIP_PUSH

    + +

    Sets whether or not Git LFS will attempt to upload new Git LFS object in a +pre-push hook. If 'true', '1', 'on', or similar, Git LFS will skip the +pre-push hook, so no new Git LFS objects will be uploaded. If unset, or set to +'false', '0', 'off', or similar, Git LFS will proceed as normal.

    +
  • +
  • +

    GIT_LFS_SET_LOCKABLE_READONLY +lfs.setlockablereadonly

    + +

    These settings, the first an environment variable and the second a gitconfig +setting, control whether files marked as 'lockable' in git lfs track are +made read-only in the working copy when not locked by the current user. +The default is true; you can disable this behaviour and have all files +writeable by setting either variable to 0, 'no' or 'false'.

    +
  • +
  • +

    lfs.lockignoredfiles

    + +

    This setting controls whether Git LFS will set ignored files that match the +lockable pattern read only as well as tracked files. The default is false; +you can enable this behavior by setting the variable to 1, 'yes', or 'true'.

    +
  • +
  • +

    lfs.defaulttokenttl

    + +

    This setting sets a default token TTL when git-lfs-authenticate does not +include the TTL in the JSON response but still enforces it.

    + +

    Note that this is only necessary for larger repositories hosted on LFS +servers that don't include the TTL.

    +
  • +
+ +

LFSCONFIG

+ +

The .lfsconfig file in a repository is read and interpreted in the same format +as the file stored in .git/config. It allows a subset of keys to be used, +including and limited to:

+ +
    +
  • lfs.allowincompletepush
  • +
  • lfs.fetchexclude
  • +
  • lfs.fetchinclude
  • +
  • lfs.gitprotocol
  • +
  • lfs.locksverify
  • +
  • lfs.pushurl
  • +
  • lfs.skipdownloaderrors
  • +
  • lfs.url
  • +
  • lfs.{*}.access
  • +
  • remote.{name}.lfsurl
  • +
+ +

The set of keys allowed in this file is restricted for security reasons.

+ +

EXAMPLES

+ +
+
Configure a custom LFS endpoint for your repository:
+
+
+ +

git config -f .lfsconfig lfs.url https://lfs.example.com/foo/bar/info/lfs

+ +

SEE ALSO

+ +

git-config(1), git-lfs-install(1), gitattributes(5), gitignore(5).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-config(5)
  5. +
+ +
+ + diff --git a/git-lfs-3.2.0/man/html/git-lfs-dedup.1.html b/git-lfs-3.2.0/man/html/git-lfs-dedup.1.html new file mode 100644 index 0000000000000000000000000000000000000000..fb5ecb47799b9fa3c44d04b6cd21f7d11a53a365 --- /dev/null +++ b/git-lfs-3.2.0/man/html/git-lfs-dedup.1.html @@ -0,0 +1,102 @@ + + + + + + git-lfs-dedup(1) - Deduplicate Git LFS files + + + + +
+ + + +
    +
  1. git-lfs-dedup(1)
  2. +
  3. +
  4. git-lfs-dedup(1)
  5. +
+ + + +

NAME

+

+ git-lfs-dedup - Deduplicate Git LFS files +

+

SYNOPSIS

+ +

git lfs dedup

+ +

DESCRIPTION

+ +

Deduplicates storage by re-creating working tree files as clones of the files in the Git LFS storage directory +using the operating system's copy-on-write file creation functionality.

+ +

If the operating system or file system don't support copy-on-write file creation, this command exits unsuccessfully.

+ +

This command will also exit without success if any Git LFS extensions are +configured, as these will typically be used to alter the file contents +before they are written to the Git LFS storage directory, and therefore the +working tree files should not be copy-on-write clones of the LFS object files.

+ +

SEE ALSO

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-dedup(1)
  5. +
+ +
+ + diff --git a/git-lfs-3.2.0/man/html/git-lfs-env.1.html b/git-lfs-3.2.0/man/html/git-lfs-env.1.html new file mode 100644 index 0000000000000000000000000000000000000000..b4d9d07f040fcbf25e3e532a3522b82227a9388f --- /dev/null +++ b/git-lfs-3.2.0/man/html/git-lfs-env.1.html @@ -0,0 +1,94 @@ + + + + + + git-lfs-env(1) - Display the Git LFS environment + + + + +
+ + + +
    +
  1. git-lfs-env(1)
  2. +
  3. +
  4. git-lfs-env(1)
  5. +
+ + + +

NAME

+

+ git-lfs-env - Display the Git LFS environment +

+

SYNOPSIS

+ +

git lfs env

+ +

DESCRIPTION

+ +

Display the current Git LFS environment.

+ +

SEE ALSO

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-env(1)
  5. +
+ +
+ + diff --git a/git-lfs-3.2.0/man/html/git-lfs-ext.1.html b/git-lfs-3.2.0/man/html/git-lfs-ext.1.html new file mode 100644 index 0000000000000000000000000000000000000000..24c4771ea9ad126f126493ea36030b93f85518f3 --- /dev/null +++ b/git-lfs-3.2.0/man/html/git-lfs-ext.1.html @@ -0,0 +1,112 @@ + + + + + + git-lfs-ext(1) - View extension details + + + + +
+ + + +
    +
  1. git-lfs-ext(1)
  2. +
  3. +
  4. git-lfs-ext(1)
  5. +
+ + + +

NAME

+

+ git-lfs-ext - View extension details +

+

SYNOPSIS

+ +

git lfs ext list [name...]

+ +

DESCRIPTION

+ +

Git LFS extensions enable the manipulation of files streams +during smudge and clean.

+ +

EXAMPLES

+ +
    +
  • +

    List details for all extensions

    + +

    git lfs ext + git lfs ext list

    +
  • +
  • +

    List details for the specified extensions

    + +

    git lfs ext list 'foo' 'bar'

    +
  • +
+ +

SEE ALSO

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-ext(1)
  5. +
+ +
+ + diff --git a/git-lfs-3.2.0/man/html/git-lfs-fetch.1.html b/git-lfs-3.2.0/man/html/git-lfs-fetch.1.html new file mode 100644 index 0000000000000000000000000000000000000000..1d9b0b3952213a44fb442f96bb3c253f5c067f91 --- /dev/null +++ b/git-lfs-3.2.0/man/html/git-lfs-fetch.1.html @@ -0,0 +1,268 @@ + + + + + + git-lfs-fetch(1) - Download all Git LFS files for a given ref + + + + +
+ + + +
    +
  1. git-lfs-fetch(1)
  2. +
  3. +
  4. git-lfs-fetch(1)
  5. +
+ + + +

NAME

+

+ git-lfs-fetch - Download all Git LFS files for a given ref +

+

SYNOPSIS

+ +

git lfs fetch [options] [remote [ref...]]

+ +

DESCRIPTION

+ +

Download Git LFS objects at the given refs from the specified remote. See +DEFAULT REMOTE and DEFAULT REFS for what happens if you don't specify.

+ +

This does not update the working copy.

+ +

OPTIONS

+ +
+
+-I paths --include=paths +
+
Specify lfs.fetchinclude just for this invocation; see INCLUDE AND EXCLUDE +
+
+-X paths --exclude=paths +
+
Specify lfs.fetchexclude just for this invocation; see INCLUDE AND EXCLUDE +
+
--recent
+
Download objects referenced by recent branches & commits in addition to those +that would otherwise be downloaded. See RECENT CHANGES +
+
--all
+
Download all objects that are referenced by any commit reachable from the refs +provided as arguments. If no refs are provided, then all refs are fetched. +This is primarily for backup and migration purposes. Cannot be combined with +--recent or --include/--exclude. Ignores any globally configured include and +exclude paths to ensure that all objects are downloaded.
+
+--prune -p +
+
Prune old and unreferenced objects after fetching, equivalent to running +git lfs prune afterwards. See git-lfs-prune(1) for more details.
+
+ +

INCLUDE AND EXCLUDE

+ +

You can configure Git LFS to only fetch objects to satisfy references in certain +paths of the repo, and/or to exclude certain paths of the repo, to reduce the +time you spend downloading things you do not use.

+ +

In your Git configuration or in a .lfsconfig file, you may set either or +both of lfs.fetchinclude and lfs.fetchexclude to comma-separated lists of +paths. If lfs.fetchinclude is defined, Git LFS objects will only be fetched +if their path matches one in that list, and if lfs.fetchexclude is defined, +Git LFS objects will only be fetched if their path does not match one +in that list. Paths are matched using wildcard matching as per gitignore(5).

+ +

Note that using the command-line options -I and -X override the respective +configuration settings. Setting either option to an empty string clears the +value.

+ +

Examples

+ +
    +
  • +

    git config lfs.fetchinclude "textures,images/foo*"

    + +

    This will only fetch objects referenced in paths in the textures folder, and +files called foo* in the images folder

    +
  • +
  • +

    git config lfs.fetchinclude "*.jpg,*.png,*.tga"

    + +

    Only fetch JPG/PNG/TGA files, wherever they are in the repository

    +
  • +
  • +

    git config lfs.fetchexclude "media/reallybigfiles"

    + +

    Don't fetch any LFS objects referenced in the folder media/reallybigfiles, but +fetch everything else

    +
  • +
  • +

    git config lfs.fetchinclude "media"
    +git config lfs.fetchexclude "media/excessive"

    + +

    Only fetch LFS objects in the 'media' folder, but exclude those in one of its +subfolders.

    +
  • +
+ +

DEFAULT REMOTE

+ +

Without arguments, fetch downloads from the default remote. The default remote +is the same as for git fetch, i.e. based on the remote branch you're tracking +first, or origin otherwise.

+ +

DEFAULT REFS

+ +

If no refs are given as arguments, the currently checked out ref is used. In +addition, if enabled, recently changed refs and commits are also +included. See RECENT CHANGES for details.

+ +

RECENT CHANGES

+ +

If the --recent option is specified, or if the gitconfig option +lfs.fetchrecentalways is true, then after the current ref (or those in the +arguments) is fetched, we also search for 'recent' changes to fetch +objects for, so that it's more convenient to checkout or diff those commits +without incurring further downloads.

+ +

What changes are considered 'recent' is based on a number of gitconfig options:

+ +
    +
  • +

    lfs.fetchrecentrefsdays +If non-zero, includes branches which have commits within N days of the current +date. Only local refs are included unless lfs.fetchrecentremoterefs is true. +The default is 7 days.

    +
  • +
  • +

    lfs.fetchrecentremoterefs +If true, fetches remote refs (for the remote you're fetching) as well as local +refs in the recent window. This is useful to fetch objects for remote branches +you might want to check out later. The default is true; if you set this to +false, fetching for those branches will only occur when you either check them +out (losing the advantage of fetch --recent), or create a tracking local +branch separately then fetch again.

    +
  • +
  • +

    lfs.fetchrecentcommitsdays +In addition to fetching at branches, also fetches changes made within N +days of the latest commit on the branch. This is useful if you're often +reviewing recent changes. The default is 0 (no previous changes).

    +
  • +
  • +

    lfs.fetchrecentalways +Always operate as if --recent was provided on the command line.

    +
  • +
+ +

EXAMPLES

+ +
    +
  • +

    Fetch the LFS objects for the current ref from default remote

    + +

    git lfs fetch

    +
  • +
  • +

    Fetch the LFS objects for the current ref AND recent changes from default +remote

    + +

    git lfs fetch --recent

    +
  • +
  • +

    Fetch the LFS objects for the current ref from a secondary remote 'upstream'

    + +

    git lfs fetch upstream

    +
  • +
  • +

    Fetch all the LFS objects from the default remote that are referenced by any +commit in the main and develop branches

    + +

    git lfs fetch --all origin main develop

    +
  • +
  • +

    Fetch the LFS objects for a branch from origin

    + +

    git lfs fetch origin mybranch

    +
  • +
  • +

    Fetch the LFS objects for 2 branches and a commit from origin

    + +

    git lfs fetch origin main mybranch e445b45c1c9c6282614f201b62778e4c0688b5c8

    +
  • +
+ +

SEE ALSO

+ +

git-lfs-checkout(1), git-lfs-pull(1), git-lfs-prune(1), gitconfig(5).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-fetch(1)
  5. +
+ +
+ + diff --git a/git-lfs-3.2.0/man/html/git-lfs-filter-process.1.html b/git-lfs-3.2.0/man/html/git-lfs-filter-process.1.html new file mode 100644 index 0000000000000000000000000000000000000000..ba44d9098294ecd99832d155f94d076ff325c896 --- /dev/null +++ b/git-lfs-3.2.0/man/html/git-lfs-filter-process.1.html @@ -0,0 +1,127 @@ + + + + + + git-lfs-filter-process(1) - Git filter process that converts between pointer and actual content + + + + +
+ + + +
    +
  1. git-lfs-filter-process(1)
  2. +
  3. +
  4. git-lfs-filter-process(1)
  5. +
+ + + +

NAME

+

+ git-lfs-filter-process - Git filter process that converts between pointer and actual content +

+

SYNOPSIS

+ +

git lfs filter-process +git lfs filter-process --skip

+ +

DESCRIPTION

+ +

Implement the Git process filter API, exchanging handshake messages and then +accepting and responding to requests to either clean or smudge a file.

+ +

filter-process is always run by Git's filter process, and is configured by the +repository's Git attributes.

+ +

In your Git configuration or in a .lfsconfig file, you may set either or +both of lfs.fetchinclude and lfs.fetchexclude to comma-separated lists of +paths. If lfs.fetchinclude is defined, Git LFS pointer files will only be +replaced with the contents of the corresponding Git LFS object file if their +path matches one in that list, and if lfs.fetchexclude is defined, Git LFS +pointer files will only be replaced with the contents of the corresponding +Git LFS object file if their path does not match one in that list. Paths are +matched using wildcard matching as per gitignore(5). Git LFS pointer files +that are not replaced with the contents of their corresponding object files +are simply copied to standard output without change.

+ +

The filter process uses Git's pkt-line protocol to communicate, and is +documented in detail in gitattributes(5).

+ +

OPTIONS

+ +

Without any options, filter-process accepts and responds to requests normally.

+ +
+
--skip
+
Skip automatic downloading of objects on clone or pull.
+
GIT_LFS_SKIP_SMUDGE
+
Disables the smudging process. For more, see: git-lfs-config(5).
+
+ +

SEE ALSO

+ +

git-lfs-clean(1), git-lfs-install(1), git-lfs-smudge(1), gitattributes(5), gitignore(5).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-filter-process(1)
  5. +
+ +
+ + diff --git a/git-lfs-3.2.0/man/html/git-lfs-fsck.1.html b/git-lfs-3.2.0/man/html/git-lfs-fsck.1.html new file mode 100644 index 0000000000000000000000000000000000000000..b0954e5e180f3f55d22a73036aadbf78c631b23b --- /dev/null +++ b/git-lfs-3.2.0/man/html/git-lfs-fsck.1.html @@ -0,0 +1,123 @@ + + + + + + git-lfs-fsck(1) - Check GIT LFS files for consistency + + + + +
+ + + +
    +
  1. git-lfs-fsck(1)
  2. +
  3. +
  4. git-lfs-fsck(1)
  5. +
+ + + +

NAME

+

+ git-lfs-fsck - Check GIT LFS files for consistency +

+

SYNOPSIS

+ +

git lfs fsck [options] [revisions]

+ +

DESCRIPTION

+ +

Checks all Git LFS files in the current HEAD for consistency.

+ +

Corrupted files are moved to ".git/lfs/bad".

+ +

The revisions may be specified as either a single committish, in which case only +that commit is inspected; specified as a range of the form A..B (and only this +form), in which case that range is inspected; or omitted entirely, in which case +HEAD (and, for --objects, the index) is examined.

+ +

The default is to perform all checks.

+ +

In your Git configuration or in a .lfsconfig file, you may set +lfs.fetchexclude to a comma-separated list of paths. If lfs.fetchexclude +is defined, then any Git LFS files whose paths match one in that list will +not be checked for consistency. Paths are matched using wildcard matching as +per gitignore(5).

+ +

OPTIONS

+ +
+
--objects
+
Check that each object in HEAD matches its expected hash and that each object +exists on disk.
+
--pointers
+
Check that each pointer is canonical and that each file which should be stored +as a Git LFS file is so stored.
+
+ +

SEE ALSO

+ +

git-lfs-ls-files(1), git-lfs-status(1), gitignore(5).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-fsck(1)
  5. +
+ +
+ + diff --git a/git-lfs-3.2.0/man/html/git-lfs-install.1.html b/git-lfs-3.2.0/man/html/git-lfs-install.1.html new file mode 100644 index 0000000000000000000000000000000000000000..9b27bf18928bff7c503ee0936710712033f169e3 --- /dev/null +++ b/git-lfs-3.2.0/man/html/git-lfs-install.1.html @@ -0,0 +1,143 @@ + + + + + + git-lfs-install(1) - Install Git LFS configuration. + + + + +
+ + + +
    +
  1. git-lfs-install(1)
  2. +
  3. +
  4. git-lfs-install(1)
  5. +
+ + + +

NAME

+

+ git-lfs-install - Install Git LFS configuration. +

+

SYNOPSIS

+ +

git lfs install options

+ +

DESCRIPTION

+ +

Perform the following actions to ensure that Git LFS is setup properly:

+ +
    +
  • Set up the clean and smudge filters under the name "lfs" in the global Git +config.
  • +
  • Install a pre-push hook to run git-lfs-pre-push(1) for the current repository, +if run from inside one. If "core.hooksPath" is configured in any Git +configuration (and supported, i.e., the installed Git version is at least +2.9.0), then the pre-push hook will be installed to that directory instead.
  • +
+ +

OPTIONS

+ +

Without any options, git lfs install will only setup the "lfs" smudge and clean +filters if they are not already set.

+ +
+
--force
+
Sets the "lfs" smudge and clean filters, overwriting existing values.
+
--local
+
Sets the "lfs" smudge and clean filters in the local repository's git + config, instead of the global git config (~/.gitconfig).
+
--worktree
+
Sets the "lfs" smudge and clean filters in the current working tree's + git config, instead of the global git config (~/.gitconfig) or local + repository's git config ($GIT_DIR/config). + If multiple working trees are in use, the Git config extension + worktreeConfig must be enabled to use this option. + If only one working tree is in use, --worktree has the same effect + as --local. + This option is only available if the installed Git version is at least + 2.20.0 and therefore supports the "worktreeConfig" extension.
+
--manual
+
Print instructions for manually updating your hooks to include git-lfs + functionality. Use this option if git lfs install fails because of existing + hooks and you want to retain their functionality.
+
--system
+
Sets the "lfs" smudge and clean filters in the system git config, e.g. /etc/gitconfig + instead of the global git config (~/.gitconfig).
+
--skip-smudge
+
Skips automatic downloading of objects on clone or pull. This requires a + manual "git lfs pull" every time a new commit is checked out on your + repository.
+
--skip-repo
+
Skips setup of the local repo; use if you want to install the global lfs + filters but not make changes to the current repo.
+
+ +

SEE ALSO

+ +

git-lfs-uninstall(1), git-worktree(1).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-install(1)
  5. +
+ +
+ + diff --git a/git-lfs-3.2.0/man/html/git-lfs-lock.1.html b/git-lfs-3.2.0/man/html/git-lfs-lock.1.html new file mode 100644 index 0000000000000000000000000000000000000000..3782b204e89656e8ed0b12cf664d6dcb1408f37a --- /dev/null +++ b/git-lfs-3.2.0/man/html/git-lfs-lock.1.html @@ -0,0 +1,116 @@ + + + + + + git-lfs-lock(1) - Set a file as "locked" on the Git LFS server + + + + +
+ + + +
    +
  1. git-lfs-lock(1)
  2. +
  3. +
  4. git-lfs-lock(1)
  5. +
+ + + +

NAME

+

+ git-lfs-lock - Set a file as "locked" on the Git LFS server +

+

SYNOPSIS

+ +

git lfs lock options path

+ +

DESCRIPTION

+ +

Sets the given file path as "locked" against the Git LFS server, with the +intention of blocking attempts by other users to update the given path. Locking +a file requires the file to exist in the working copy.

+ +

Once locked, LFS will verify that Git pushes do not modify files locked by +other users. See the description of the lfs.<url>.locksverify config key in +git-lfs-config(5) for details.

+ +

OPTIONS

+ +
+
+-r name --remote=name +
+
Specify the Git LFS server to use. Ignored if the lfs.url config key is set.
+
--json
+
Writes lock info as JSON to STDOUT if the command exits successfully. Intended +for interoperation with external tools. If the command returns with a non-zero +exit code, plain text messages will be sent to STDERR.
+
+ +

SEE ALSO

+ +

git-lfs-unlock(1), git-lfs-locks(1).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-lock(1)
  5. +
+ +
+ + diff --git a/git-lfs-3.2.0/man/html/git-lfs-locks.1.html b/git-lfs-3.2.0/man/html/git-lfs-locks.1.html new file mode 100644 index 0000000000000000000000000000000000000000..ac8b000e116ad55868e5cb8af5daddff2d4a316c --- /dev/null +++ b/git-lfs-3.2.0/man/html/git-lfs-locks.1.html @@ -0,0 +1,138 @@ + + + + + + git-lfs-locks(1) - Lists currently locked files from the Git LFS server. + + + + +
+ + + +
    +
  1. git-lfs-locks(1)
  2. +
  3. +
  4. git-lfs-locks(1)
  5. +
+ + + +

NAME

+

+ git-lfs-locks - Lists currently locked files from the Git LFS server. +

+

SYNOPSIS

+ +

git lfs locks options

+ +

DESCRIPTION

+ +

Lists current locks from the Git LFS server.

+ +

OPTIONS

+ +
+
+-r name --remote=name +
+
Specify the Git LFS server to use. Ignored if the lfs.url config key is set.
+
+-i <id> --id=<id> +
+
Specifies a lock by its ID. Returns a single result.
+
+-p <path> --path=<path> +
+
Specifies a lock by its path. Returns a single result.
+
--local
+
Lists only our own locks which are cached locally. Skips a remote call.
+
--cached
+
Lists cached locks from the last remote call. Contrary to --local, this will +include locks of other users as well. This option is intended to display the +last known locks in case you are offline. There is no guarantee that locks +on the server have not changed in the meanwhile.
+
--verify
+
Verifies the lock owner on the server and marks our own locks by 'O'. +Own locks are actually held by us and corresponding files can be updated for +the next push. All other locks are held by someone else. +Contrary to --local, this option will also detect locks which are held by us +despite no local lock information being available (e.g. because the file had +been locked from a different clone); +it will also detect 'broken' locks (e.g. if someone else has forcefully +unlocked our files).
+
+-l <num> --limit=<num> +
+
Specifies number of results to return.
+
--json
+
Writes lock info as JSON to STDOUT if the command exits successfully. Intended +for interoperation with external tools. If the command returns with a non-zero +exit code, plain text messages will be sent to STDERR.
+
+ +

SEE ALSO

+ +

git-lfs-lock(1), git-lfs-unlock(1).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-locks(1)
  5. +
+ +
+ + diff --git a/git-lfs-3.2.0/man/html/git-lfs-logs.1.html b/git-lfs-3.2.0/man/html/git-lfs-logs.1.html new file mode 100644 index 0000000000000000000000000000000000000000..a2328604b550b70fb7694b00c78132a76f2014e9 --- /dev/null +++ b/git-lfs-3.2.0/man/html/git-lfs-logs.1.html @@ -0,0 +1,118 @@ + + + + + + git-lfs-logs(1) - Show errors from the git-lfs command + + + + +
+ + + +
    +
  1. git-lfs-logs(1)
  2. +
  3. +
  4. git-lfs-logs(1)
  5. +
+ + + +

NAME

+

+ git-lfs-logs - Show errors from the git-lfs command +

+

SYNOPSIS

+ +

git lfs logs
+git lfs logs file
+git lfs logs clear
+git lfs logs boomtown

+ +

DESCRIPTION

+ +

Display errors from the git-lfs command. Any time it crashes, the details are +saved to ".git/lfs/logs".

+ +

COMMANDS

+ +
+
clear
+
Clears all of the existing logged errors.
+
boomtown
+
Triggers a dummy exception.
+
+ +

OPTIONS

+ +

Without any options, git lfs logs simply shows the list of error logs.

+ +
+
file
+
Shows the specified error log. Use "last" to show the most recent error.
+
+ +

SEE ALSO

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-logs(1)
  5. +
+ +
+ + diff --git a/git-lfs-3.2.0/man/html/git-lfs-ls-files.1.html b/git-lfs-3.2.0/man/html/git-lfs-ls-files.1.html new file mode 100644 index 0000000000000000000000000000000000000000..2f92cfe91f037a4ba709e0e98552ada70b6bb4ed --- /dev/null +++ b/git-lfs-3.2.0/man/html/git-lfs-ls-files.1.html @@ -0,0 +1,141 @@ + + + + + + git-lfs-ls-files(1) - Show information about Git LFS files in the index and working tree + + + + +
+ + + +
    +
  1. git-lfs-ls-files(1)
  2. +
  3. +
  4. git-lfs-ls-files(1)
  5. +
+ + + +

NAME

+

+ git-lfs-ls-files - Show information about Git LFS files in the index and working tree +

+

SYNOPSIS

+ +

git lfs ls-files [ref]
+git lfs ls-files ref ref

+ +

DESCRIPTION

+ +

Display paths of Git LFS files that are found in the tree at the given +reference. If no reference is given, scan the currently checked-out branch. +If two references are given, the LFS files that are modified between the two +references are shown; deletions are not listed.

+ +

An asterisk (*) after the OID indicates a full object, a minus (-) indicates an +LFS pointer.

+ +

OPTIONS

+ +
+
+-l --long +
+
Show the entire 64 character OID, instead of just first 10.
+
+-s --size +
+
Show the size of the LFS object between parenthesis at the end of a line.
+
+-d --debug +
+
Show as much information as possible about a LFS file. This is intended +for manual inspection; the exact format may change at any time.
+
+-a --all +
+
Inspects the full history of the repository, not the current HEAD (or other +provided reference). This will include previous versions of LFS objects that +are no longer found in the current tree.
+
--deleted
+
Shows the full history of the given reference, including objects that have +been deleted.
+
+-I paths --include=paths +
+
Include paths matching only these patterns; see [FETCH SETTINGS].
+
+-X paths --exclude=paths +
+
Exclude paths matching any of these patterns; see [FETCH SETTINGS].
+
+-n --name-only +
+
Show only the lfs tracked file names. +## SEE ALSO
+
+ +

git-lfs-status(1), git-lfs-config(5).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-ls-files(1)
  5. +
+ +
+ + diff --git a/git-lfs-3.2.0/man/html/git-lfs-merge-driver.1.html b/git-lfs-3.2.0/man/html/git-lfs-merge-driver.1.html new file mode 100644 index 0000000000000000000000000000000000000000..029bd4f5e8a01267c355c28d18588a667727d375 --- /dev/null +++ b/git-lfs-3.2.0/man/html/git-lfs-merge-driver.1.html @@ -0,0 +1,188 @@ + + + + + + git-lfs-merge-driver(1) - Merge text-based LFS files + + + + +
+ + + +
    +
  1. git-lfs-merge-driver(1)
  2. +
  3. +
  4. git-lfs-merge-driver(1)
  5. +
+ + + +

NAME

+

+ git-lfs-merge-driver - Merge text-based LFS files +

+

SYNOPSIS

+ +

git lfs merge-driver options

+ +

DESCRIPTION

+ +

Merge text files stored in Git LFS using the default Git merge machinery, or a +custom merge driver if specified. Note that this, in general, does not support +partial renames or copies because Git does not support them in this case.

+ +

This program is intended to be invoked automatically by Git and not by users +manually. See CONFIGURATION for details on the configuration required for +that.

+ +

OPTIONS

+ +
    +
  • +

    --ancestor path +Specify the file containing the ancestor revision.

    +
  • +
  • +

    --current path +Specify the file containing the current revision.

    +
  • +
  • +

    --marker-size num +Specify the conflict marker size as an integer.

    +
  • +
  • +

    --other path +Specify the file containing the other revision.

    +
  • +
  • +

    --program program +Specify a command, which is passed to the shell after substitution, that +performs the actual merge. If this is not specified, git merge-file is +invoked with appropriate arguments to perform the merge of the file.

    + +

    See CONFIGURATION for the sequences which are substituted here.

    +
  • +
+ +

CONFIGURATION

+ +

Git allows the use of a custom merge driver for files based on the merge +attribute set in .gitattributes. By default, when using git lfs track, this +value is set to lfs.

+ +

Because Git LFS can be used to store both text and binary files and it isn't +always clear which behavior should be used, Git LFS does not enable this merge +driver by default. However, if you know that some or all of your files are text +files, then you can set the merge attribute for those files to lfs-text and +use git config to set the merge driver like so:

+ +

console +$ git config merge.lfs-text.driver 'git lfs merge-driver --ancestor %O --current %A --other %B --marker-size %L --output %A' +

+ +

This tells Git to invoke the custom Git LFS merge driver, which in turn uses +Git's merge machinery, to merge files where the merge attribute is set to +lfs-text. Note that lfs-text here is an example and any syntactically valid +value can be used.

+ +

If you are using a special type of file that needs rules different from Git's +standard merge machinery, you can also specify the --program option, which +is passed to sh after substituting its own percent-encoded escapes:

+ +
    +
  • +%A: the current version
  • +
  • +%B: the other version
  • +
  • +%D: the destination version
  • +
  • +%O: the ancestor version
  • +
  • +%L: the conflict marker size
  • +
+ +

Note that the percent sign must typically be doubled to prevent Git from +substituting its own values here. Therefore, specifying the default behavior +explicitly looks like this:

+ +

console +$ git config merge.lfs-text.driver \ + 'git lfs merge-driver --ancestor %O --current %A --other %B --marker-size %L --output %A --program '\''git merge-file --stdout --marker-size=%%L %%A %%O %%B >%%D'\''' +

+ +

The exit status from the custom command should be zero on success or non-zero on +conflicts or other failure.

+ +

Note that if no merge driver is specified for the value of the merge attribute +(as is the case by default with merge=lfs), then the default Git merge +strategy is used. For LFS files, this means that Git will try to merge the +pointer files, which usually is not useful.

+ +

SEE ALSO

+ +

git-merge(1), git-merge-file(1), gitattributes(5)

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-merge-driver(1)
  5. +
+ +
+ + diff --git a/git-lfs-3.2.0/man/html/git-lfs-migrate.1.html b/git-lfs-3.2.0/man/html/git-lfs-migrate.1.html new file mode 100644 index 0000000000000000000000000000000000000000..e2eb84f41d9edbb8fe0afb6c20277d7ec7ae2964 --- /dev/null +++ b/git-lfs-3.2.0/man/html/git-lfs-migrate.1.html @@ -0,0 +1,590 @@ + + + + + + git-lfs-migrate(1) - Migrate history to or from Git LFS + + + + +
+ + + +
    +
  1. git-lfs-migrate(1)
  2. +
  3. +
  4. git-lfs-migrate(1)
  5. +
+ + + +

NAME

+

+ git-lfs-migrate - Migrate history to or from Git LFS +

+

SYNOPSIS

+ +

git lfs migrate mode [options] [--] [branch ...]

+ +

DESCRIPTION

+ +

Convert files in a Git repository to or from Git LFS pointers, or +summarize Git file sizes by file type. The import mode converts Git +files (i.e., blobs) to Git LFS, while the export mode does the reverse, +and the info mode provides an informational summary which may be useful +in deciding which files to import or export.

+ +

In all modes, by default git lfs migrate operates only on the currently +checked-out branch, and only on files (of any size and type) added in +commits which do not exist on any remote. Multiple options are available +to override these defaults.

+ +

When converting files to or from Git LFS, the git lfs migrate command will +only make changes to your local repository and working copy, never any +remotes. This is intentional as the import and export modes are +generally "destructive" in the sense that they rewrite your Git history, +changing commits and generating new commit SHAs. (The exception is the +"no-rewrite" import sub-mode; see IMPORT WITHOUT REWRITING HISTORY +for details.)

+ +

You should therefore always first commit or stash any uncommitted work +before using the import or export modes, and then validate the result of +the migration before pushing the changes to your remotes, for instance by +running the info mode and by examining your rewritten commit history.

+ +

Once you are satisfied with the changes, you will need to force-push the +new Git history of any rewritten branches to all your remotes. This is +a step which should be taken with care, since you will be altering the +Git history on your remotes.

+ +

To examine or modify files in branches other than the currently checked-out +one, branch refs may be specified directly, or provided in one or more +--include-ref options. They may also be excluded by prefixing them +with ^ or providing them in --exclude-ref options. Use the --everything +option to specify that all refs should be examined, including all remote refs. +See INCLUDE AND EXCLUDE REFERENCES for details.

+ +

For the info and import modes, all file types are considered by default; +while useful in the info mode, this is often not desirable when importing, +so either filename patterns (pathspecs) or the --fixup option should +normally be specified in that case. (At least one include pathspec is +required for the export mode.) Pathspecs may be defined using the +--include and --exclude options (-I and -X for short), as described +in INCLUDE AND EXCLUDE.

+ +

As typical Git LFS usage depends on tracking specific file types using +filename patterns defined in .gitattributes files, the git lfs migrate +command will examine, create, and modify .gitattributes files as +necessary. The .gitattributes files will always be assigned the default +read/write permissions mode (i.e., without execute permissions). Any +symbolic links with that name will cause the migration to halt prematurely.

+ +

The import mode (see IMPORT) will convert Git objects of the file types +specified (e.g., with --include) to Git LFS pointers, and will add entries +for those file types to .gitattributes files, creating those files if they +do not exist. The result should be as if git lfs track commands had been +run at the points in your Git history corresponding to where each type of +converted file first appears. The exception is if the --fixup option is +given, in which case the import mode will only examine any existing +.gitattributes files and then convert Git objects which should be tracked +by Git LFS but are not yet.

+ +

The export mode (see EXPORT) works as the reverse operation to the +import mode, converting any Git LFS pointers that match the file types +specified with --include, which must be given at least once. Note that +.gitattributes entries will not be removed, nor will the files; instead, +the export mode inserts "do not track" entries similar to those created +by the git lfs untrack command. The --remote option is available in +the export mode to specify the remote from which Git LFS objects should +be fetched if they do not exist in the local Git LFS object cache; if not +provided, origin is used by default.

+ +

The info mode (see INFO) summarizes by file type (i.e., by filename +extension) the total number and size of files in a repository. Note that +like the other two modes, by default the info mode operates only on +the currently checked-out branch and only on commits which do not exist on +any remote, so to get a summary of the entire repository across all branches, +use the --everything option. If objects have already been converted to +Git LFS pointers, then by default the size of the referenced objects is +totaled and reported separately. You may also choose to ignore them by +using --pointers=ignore or to treat the pointers as files by using +--pointers=no-follow. (The latter option is akin to how existing Git LFS +pointers were handled by the info mode in prior versions of Git LFS).

+ +

When using the --everything option, take note that it means all refs +(local and remote) will be considered, but not necessarily all file types. +The import and info modes consider all file types by default, although +the --include and --exclude options constrain this behavior. Also +note that after importing across all branches with the --everything option +(and then checking to ensure the results are satisfactory!) it may be +convenient to update multiple branches on your remotes by using the --all +option to git push.

+ +

Unless the --skip-fetch option is given, git lfs migrate always begins by +fetching updated lists of refs from all the remotes returned by git remote, +but as noted above, after making changes to your local Git history while +converting objects, it will never automatically push those changes to your +remotes.

+ +

MODES

+ +
    +
  • +

    info + Show information about repository size. See INFO.

    +
  • +
  • +

    import + Convert Git objects to Git LFS pointers. See IMPORT and + IMPORT WITHOUT REWRITING HISTORY.

    +
  • +
  • +

    export + Convert Git LFS pointers to Git objects. See EXPORT.

    +
  • +
+ +

OPTIONS

+ +
+
+-I paths --include=paths +
+
See INCLUDE AND EXCLUDE.
+
+-X paths --exclude=paths +
+
See INCLUDE AND EXCLUDE.
+
+--include-ref=refname +
+
See INCLUDE AND EXCLUDE REFERENCES.
+
+--exclude-ref=refname +
+
See INCLUDE AND EXCLUDE REFERENCES.
+
--skip-fetch
+
Assumes that the known set of remote references is complete, and should not + be refreshed when determining the set of "un-pushed" commits to migrate. Has + no effect when combined with --include-ref or --exclude-ref.
+
--everything
+
See INCLUDE AND EXCLUDE REFERENCES. + +

Note: Git refs are "case-sensitive" on all platforms in "packed from" (see + git-pack-refs(1)). On "case-insensitive" file systems, e.g. NTFS on Windows + or default APFS on macOS, git-lfs-migrate(1) would only migrate the first + ref if two or more refs are equal except for upper/lower case letters.

+
+
--yes
+
Assume a yes answer to any prompts, permitting noninteractive use. + Currently, the only such prompt is the one asking whether to overwrite + (destroy) any working copy changes. Thus, specifying this option may cause + data loss if you are not careful.
+
[branch ...]
+
Migrate only the set of branches listed. If not given, git-lfs-migrate(1) + will migrate the currently checked out branch. + +

References beginning with ^ will be excluded, whereas branches that do not + begin with ^ will be included.

+ +

If any of --include-ref or --exclude-ref are given, the checked out + branch will not be appended, but branches given explicitly will be appended.

+
+
+ +

INFO

+ +

The info mode summarizes the sizes of file objects present in the Git history. +It supports all the core migrate options and these additional ones:

+ +
    +
  • +

    --above=<size> + Only count files whose individual filesize is above the given size. size + may be specified as a number of bytes, or a number followed by a storage + unit, e.g., "1b", "20 MB", "3 TiB", etc.

    + +

    If a set of files sharing a common extension has no files in that set whose + individual size is above the given --above no files no entry for that set + will be shown.

    +
  • +
  • +

    --top=<n> + Only display the top n entries, ordered by how many total files match the + given pathspec. The default is to show only the top 5 entries. When + existing Git LFS objects are found, an extra, separate "LFS Objects" line + is output in addition to the top n entries, unless the --pointers + option is used to change this behavior.

    +
  • +
  • +

    --unit=<unit> + Format the number of bytes in each entry as a quantity of the storage unit + provided. Valid units include: + * b, kib, mib, gib, tib, pib - for IEC storage units + * b, kb, mb, gb, tb, pb - for SI storage units

    + +

    If a --unit is not specified, the largest unit that can fit the number of + counted bytes as a whole number quantity is chosen.

    +
  • +
  • +

    --pointers=[follow|no-follow|ignore] + Treat existing Git LFS pointers in the history according to one of three + alternatives. In the default follow case, if any pointers are found, + an additional separate "LFS Objects" line item is output which summarizes + the total number and size of the Git LFS objects referenced by pointers. + In the ignore case, any pointers are simply ignored, while the no-follow + case replicates the behavior of the info mode in older Git LFS versions + and treats any pointers it finds as if they were regular files, so the + output totals only include the contents of the pointers, not the contents + of the objects to which they refer.

    +
  • +
  • +

    --fixup + Infer --include and --exclude filters on a per-commit basis based on the + .gitattributes files in a repository. In practice, this option counts any + filepaths which should be tracked by Git LFS according to the repository's + .gitattributes file(s), but aren't already pointers. The .gitattributes + files are not reported, in contrast to the normal output of the info mode. + This option is incompatible with explicitly given --include, --exclude + filters and with any --pointers setting other than ignore, hence + --fixup implies --pointers=ignore if it is not explicitly set.

    +
  • +
+ +

The format of the output shows the filename pattern, the total size of the +file objects (excluding those below the --above threshold, if one was +defined), and the ratio of the number of files above the threshold to the +total number of files; this ratio is also shown as a percentage. For example:

+ +

+*.gif 93 MB 9480/10504 files(s) 90% +*.png 14 MB 1732/1877 files(s) 92% +

+ +

By default only the top five entries are shown, but --top allows for +more or fewer to be output as desired.

+ +

IMPORT

+ +

The import mode migrates objects present in the Git history to pointer +files tracked and stored with Git LFS. It supports all the core migrate +options and these additional ones:

+ +
    +
  • +

    --verbose + Print the commit oid and filename of migrated files to STDOUT.

    +
  • +
  • +

    --above=<size> + Only migrate files whose individual filesize is above the given size. size + may be specified as a number of bytes, or a number followed by a storage + unit, e.g., "1b", "20 MB", "3 TiB", etc. This option cannot be used with + the --include, --exclude, and --fixup options.

    +
  • +
  • +

    --object-map=<path> + Write to path a file with the mapping of each rewritten commits. The file + format is CSV with this pattern: OLD-SHA,NEW-SHA

    +
  • +
  • +

    --no-rewrite + Migrate objects to Git LFS in a new commit without rewriting Git + history. Please note that when this option is used, the migrate import + command will expect a different argument list, specialized options will + become available, and the core migrate options will be ignored. See + IMPORT WITHOUT REWRITING HISTORY.

    +
  • +
  • +

    --fixup + Infer --include and --exclude filters on a per-commit basis based on the + .gitattributes files in a repository. In practice, this option imports any + filepaths which should be tracked by Git LFS according to the repository's + .gitattributes file(s), but aren't already pointers. This option is + incompatible with explicitly given --include, --exclude filters.

    +
  • +
+ +

If --no-rewrite is not provided and --include or --exclude (-I, -X, +respectively) are given, the .gitattributes will be modified to include any +new filepath patterns as given by those flags.

+ +

If --no-rewrite is not provided and neither of those flags are given, the +gitattributes will be incrementally modified to include new filepath extensions +as they are rewritten in history.

+ +

IMPORT WITHOUT REWRITING HISTORY

+ +

The import mode has a special sub-mode enabled by the --no-rewrite flag. +This sub-mode will migrate objects to pointers as in the base import +mode, but will do so in a new commit without rewriting Git history. When using +this sub-mode, the base migrate options, such as --include-ref, will be +ignored, as will those for the base import mode. The migrate command will +also take a different argument list. As a result of these changes, +--no-rewrite will only operate on the current branch - any other interested +branches must have the generated commit merged in.

+ +

The --no-rewrite sub-mode supports the following options and arguments:

+ +
    +
  • +

    -m <message> --message=<message> + Specifies a commit message for the newly created commit.

    +
  • +
  • +

    [file ...] + The list of files to import. These files must be tracked by patterns + specified in the gitattributes.

    +
  • +
+ +

If --message is given, the new commit will be created with the provided +message. If no message is given, a commit message will be generated based on the +file arguments.

+ +

EXPORT

+ +

The export mode migrates Git LFS pointer files present in the Git history out +of Git LFS, converting them into their corresponding object files. It supports +all the core migrate options and these additional ones:

+ +
    +
  • +

    --verbose + Print the commit oid and filename of migrated files to STDOUT.

    +
  • +
  • +

    --object-map=<path> + Write to path a file with the mapping of each rewritten commit. The file + format is CSV with this pattern: OLD-SHA,NEW-SHA

    +
  • +
  • +

    --remote=<git-remote> + Download LFS objects from the provided git-remote during the export. If + not provided, defaults to origin.

    +
  • +
+ +

The export mode requires at minimum a pattern provided with the --include +argument to specify which files to export. Files matching the --include +patterns will be removed from Git LFS, while files matching the --exclude +patterns will retain their Git LFS status. The export command will modify the +.gitattributes to set/unset any filepath patterns as given by those flags.

+ +

INCLUDE AND EXCLUDE

+ +

You can specify that git lfs migrate should only convert files whose +pathspec matches the --include glob patterns and does not match the +--exclude glob patterns, either to reduce total migration time or to only +migrate part of your repo. Multiple patterns may be given using commas +as delimiters.

+ +

Pattern matching is done so as to be functionally equivalent to the pattern +matching format of .gitattributes. In addition to simple file extension +matches (e.g., *.gif) patterns may also specify directory paths, in which +case the path/** format may be used to match recursively.

+ +

Note that this form of pattern matching for the --include and --exclude +options used by the git lfs migrate command is unique among the suite of +git lfs commands. Other commands which also take these options, such as +git lfs ls-files, use the gitignore(5) form of pattern matching instead.

+ +

INCLUDE AND EXCLUDE REFERENCES

+ +

You can specify that git lfs migrate should only convert files added +in commits reachable from certain references, namely those defined using one +or more --include-ref options, and should ignore files in commits reachable +from references defined in --exclude-ref options.

+ +

+ D---E---F + / \ + A---B------C refs/heads/my-feature + \ \ + \ refs/heads/main + \ + refs/remotes/origin/main +

+ +

In the above configuration, the following commits are reachable by each ref:

+ +

+refs/heads/main: C, B, A +refs/heads/my-feature: F, E, D, B, A +refs/remote/origin/main: A +

+ +

The following git lfs migrate options would, therefore, include commits +F, E, D, C, and B, but exclude commit A:

+ +

+ --include-ref=refs/heads/my-feature + --include-ref=refs/heads/main + --exclude-ref=refs/remotes/origin/main +

+ +

The presence of flag --everything indicates that all local and remote +references should be migrated.

+ +

EXAMPLES

+ +

Migrate unpushed commits

+ +

A common use case for the migrate command is to convert large Git objects to +LFS before pushing your commits. By default, it only scans commits that don't +exist on any remote, so long as the repository is non-bare.

+ +

First, run git lfs migrate info to list the file types taking up the most +space in your repository:

+ +

+$ git lfs migrate info +migrate: Fetching remote refs: ..., done +migrate: Sorting commits: ..., done +migrate: Examining commits: 100% (1/1), done +*.mp3 284 MB 1/1 files(s) 100% +*.pdf 42 MB 8/8 files(s) 100% +*.psd 9.8 MB 15/15 files(s) 100% +*.ipynb 6.9 MB 6/6 files(s) 100% +*.csv 5.8 MB 2/2 files(s) 100% +

+ +

Now, you can run git lfs migrate import to convert some file types to LFS:

+ +

+$ git lfs migrate import --include="*.mp3,*.psd" +migrate: Fetching remote refs: ..., done +migrate: Sorting commits: ..., done +migrate: Rewriting commits: 100% (1/1), done + main d2b959babd099fe70da1c1512e2475e8a24de163 -> 136e706bf1ae79643915c134e17a6c933fd53c61 +migrate: Updating refs: ..., done +

+ +

If after conversion you find that some files in your working directory have +been replaced with Git LFS pointers, this is normal, and the working copies +of these files can be repopulated with their full expected contents by using +git lfs checkout.

+ +

Migrate local history

+ +

You can also migrate the entire history of your repository:

+ +

``` +# Check for large files and existing Git LFS objects in your local main branch +$ git lfs migrate info --include-ref=main

+ +

Check for large files and existing Git LFS objects in every branch

+

$ git lfs migrate info --everything

+ +

Check for large files in every branch, ignoring any existing Git LFS objects,

+

# and listing the top 100 or fewer results +$ git lfs migrate info --everything --pointers=ignore --top=100 +```

+ +

The same flags will work in import mode:

+ +

``` +# Convert all zip files in your main branch +$ git lfs migrate import --include-ref=main --include="*.zip"

+ +

Convert all zip files in every local branch

+

$ git lfs migrate import --everything --include="*.zip"

+ +

Convert all files over 100K in every local branch

+

$ git lfs migrate import --everything --above=100Kb +```

+ +

Note: This will require a force-push to any existing Git remotes. Using +the --all option when force-pushing may be convenient if many refs were +updated, e.g., after importing to Git LFS with the --everything option.

+ +

Migrate without rewriting local history

+ +

You can also migrate files without modifying the existing history of your +repository. Note that in the examples below, files in subdirectories are not +included because they are not explicitly specified.

+ +

Without a specified commit message:

+ +

+$ git lfs migrate import --no-rewrite test.zip *.mp3 *.psd +

+ +

With a specified commit message:

+ +

+$ git lfs migrate import --no-rewrite \ + -m "Import test.zip, .mp3, .psd files in root of repo" \ + test.zip *.mp3 *.psd +

+ +

SEE ALSO

+ +

git-lfs-checkout(1), git-lfs-ls-files(1), git-lfs-track(1), +git-lfs-untrack(1), gitattributes(5), gitignore(5).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-migrate(1)
  5. +
+ +
+ + diff --git a/git-lfs-3.2.0/man/html/git-lfs-pointer.1.html b/git-lfs-3.2.0/man/html/git-lfs-pointer.1.html new file mode 100644 index 0000000000000000000000000000000000000000..276503496572ab14db7fa78b739928c85f9c4af6 --- /dev/null +++ b/git-lfs-3.2.0/man/html/git-lfs-pointer.1.html @@ -0,0 +1,125 @@ + + + + + + git-lfs-pointer(1) - Build, compare, and check pointers + + + + +
+ + + +
    +
  1. git-lfs-pointer(1)
  2. +
  3. +
  4. git-lfs-pointer(1)
  5. +
+ + + +

NAME

+

+ git-lfs-pointer - Build, compare, and check pointers +

+

SYNOPSIS

+ +

git lfs pointer --file=path/to/file
+git lfs pointer --file=path/to/file --pointer=path/to/pointer
+git lfs pointer --file=path/to/file --stdin +git lfs pointer --check --file=path/to/file

+ +

Description

+ +

Builds and optionally compares generated pointer files to ensure consistency +between different Git LFS implementations.

+ +

OPTIONS

+ +
+
--file
+
A local file to build the pointer from.
+
--pointer
+
A local file including the contents of a pointer generated from another + implementation. This is compared to the pointer generated from --file.
+
--stdin
+
Reads the pointer from STDIN to compare with the pointer generated from + --file.
+
--check
+
Reads the pointer from STDIN (if --stdin is given) or the filepath (if + --file) is given. If neither or both of --stdin and --file are given, + the invocation is invalid. Exits 0 if the data read is a valid Git LFS + pointer. Exits 1 otherwise.
+
+--strict:
+
+
--no-strict
+
In conjunction with --check, --strict verifies that the pointer is + canonical; that is, it would be the one created by Git LFS. If it is not, + exits 2. The default, for backwards compatibility, is --no-strict, but + this may change in a future version.
+
+ +

SEE ALSO

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-pointer(1)
  5. +
+ +
+ + diff --git a/git-lfs-3.2.0/man/html/git-lfs-post-checkout.1.html b/git-lfs-3.2.0/man/html/git-lfs-post-checkout.1.html new file mode 100644 index 0000000000000000000000000000000000000000..c2a85cfa8a2adfc8c735529afbf1908324071984 --- /dev/null +++ b/git-lfs-3.2.0/man/html/git-lfs-post-checkout.1.html @@ -0,0 +1,98 @@ + + + + + + git-lfs-post-checkout(1) - Git post-checkout hook implementation + + + + +
+ + + +
    +
  1. git-lfs-post-checkout(1)
  2. +
  3. +
  4. git-lfs-post-checkout(1)
  5. +
+ + + +

NAME

+

+ git-lfs-post-checkout - Git post-checkout hook implementation +

+

SYNOPSIS

+ +

git lfs post-checkout rev_before ref_after is_branch_checkout

+ +

DESCRIPTION

+ +

Responds to Git post-checkout events. It makes sure that any files which are +marked as lockable by git lfs track are read-only in the working copy, if +not currently locked by the local user.

+ +

SEE ALSO

+ +

git-lfs-track(1)

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-post-checkout(1)
  5. +
+ +
+ + diff --git a/git-lfs-3.2.0/man/html/git-lfs-post-commit.1.html b/git-lfs-3.2.0/man/html/git-lfs-post-commit.1.html new file mode 100644 index 0000000000000000000000000000000000000000..12cace66629a086d3c969df4660ec9646ccbd70e --- /dev/null +++ b/git-lfs-3.2.0/man/html/git-lfs-post-commit.1.html @@ -0,0 +1,103 @@ + + + + + + git-lfs-post-commit(1) - Git post-commit hook implementation + + + + +
+ + + +
    +
  1. git-lfs-post-commit(1)
  2. +
  3. +
  4. git-lfs-post-commit(1)
  5. +
+ + + +

NAME

+

+ git-lfs-post-commit - Git post-commit hook implementation +

+

SYNOPSIS

+ +

git lfs post-commit

+ +

DESCRIPTION

+ +

Responds to Git post-commit events. It makes sure that any files which are +marked as lockable by git lfs track are read-only in the working copy, if +not currently locked by the local user.

+ +

Where the git lfs post-merge command, which has a similar purpose, must +examine all files in the working copy, git lfs post-commit can limit +itself checking only those files which have changed in HEAD. It primarily +handles newly added lockable files which have not yet been made read-only.

+ +

SEE ALSO

+ +

git-lfs-post-merge(1), git-lfs-track(1)

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-post-commit(1)
  5. +
+ +
+ + diff --git a/git-lfs-3.2.0/man/html/git-lfs-post-merge.1.html b/git-lfs-3.2.0/man/html/git-lfs-post-merge.1.html new file mode 100644 index 0000000000000000000000000000000000000000..0b8c8f4d74fb25f3cf338347850221d18b930b36 --- /dev/null +++ b/git-lfs-3.2.0/man/html/git-lfs-post-merge.1.html @@ -0,0 +1,98 @@ + + + + + + git-lfs-post-merge(1) - Git post-merge hook implementation + + + + +
+ + + +
    +
  1. git-lfs-post-merge(1)
  2. +
  3. +
  4. git-lfs-post-merge(1)
  5. +
+ + + +

NAME

+

+ git-lfs-post-merge - Git post-merge hook implementation +

+

SYNOPSIS

+ +

git lfs post-merge is_squash

+ +

DESCRIPTION

+ +

Responds to Git post-merge events. It makes sure that any files which are +marked as lockable by git lfs track are read-only in the working copy, if +not currently locked by the local user.

+ +

SEE ALSO

+ +

git-lfs-track(1)

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-post-merge(1)
  5. +
+ +
+ + diff --git a/git-lfs-3.2.0/man/html/git-lfs-pre-push.1.html b/git-lfs-3.2.0/man/html/git-lfs-pre-push.1.html new file mode 100644 index 0000000000000000000000000000000000000000..b2ef773add8efbadaf1177864dcbf31c701ef53a --- /dev/null +++ b/git-lfs-3.2.0/man/html/git-lfs-pre-push.1.html @@ -0,0 +1,119 @@ + + + + + + git-lfs-pre-push(1) - Git pre-push hook implementation + + + + +
+ + + +
    +
  1. git-lfs-pre-push(1)
  2. +
  3. +
  4. git-lfs-pre-push(1)
  5. +
+ + + +

NAME

+

+ git-lfs-pre-push - Git pre-push hook implementation +

+

SYNOPSIS

+ +

git lfs pre-push remote [remoteurl]

+ +

DESCRIPTION

+ +

Responds to Git pre-hook events. It reads the range of commits from STDIN, in +the following format:

+ +
<local-ref> SP <local-sha1> SP <remote-ref> SP <remote-sha1> \n
+
+ +

It also takes the remote name and URL as arguments.

+ +

If any of those Git objects are associated with Git LFS objects, those +objects will be pushed to the Git LFS API.

+ +

In the case of pushing a new branch, the list of Git objects will be all of +the Git objects in this branch.

+ +

In the case of deleting a branch, no attempts to push Git LFS objects will be +made.

+ +

OPTIONS

+ +
+
GIT_LFS_SKIP_PUSH
+
Do nothing on pre-push. For more, see: git-lfs-config(5).
+
+ +

SEE ALSO

+ +

git-lfs-clean(1), git-lfs-push(1).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-pre-push(1)
  5. +
+ +
+ + diff --git a/git-lfs-3.2.0/man/html/git-lfs-prune.1.html b/git-lfs-3.2.0/man/html/git-lfs-prune.1.html new file mode 100644 index 0000000000000000000000000000000000000000..d215726dfa4a7eef61ea0e2bab40ff97bf8fcbe3 --- /dev/null +++ b/git-lfs-3.2.0/man/html/git-lfs-prune.1.html @@ -0,0 +1,245 @@ + + + + + + git-lfs-prune(1) - Delete old LFS files from local storage + + + + +
+ + + +
    +
  1. git-lfs-prune(1)
  2. +
  3. +
  4. git-lfs-prune(1)
  5. +
+ + + +

NAME

+

+ git-lfs-prune - Delete old LFS files from local storage +

+

SYNOPSIS

+ +

git lfs prune options

+ +

DESCRIPTION

+ +

Deletes local copies of LFS files which are old, thus freeing up disk space. +Prune operates by enumerating all the locally stored objects, and then deleting +any which are not referenced by at least ONE of the following:

+ +
    +
  • the current checkout
  • +
  • all existing stashes
  • +
  • a 'recent branch'; see RECENT FILES +
  • +
  • a 'recent commit' on the current branch or recent branches; see RECENT FILES +
  • +
  • a commit which has not been pushed; see UNPUSHED LFS FILES +
  • +
  • any other worktree checkouts; see git-worktree(1) +
  • +
+ +

In general terms, prune will delete files you're not currently using and which +are not 'recent', so long as they've been pushed i.e. the local copy is not the +only one.

+ +

The reflog is not considered, only commits. Therefore LFS objects that are +only referenced by orphaned commits are always deleted.

+ +

Note: you should not run git lfs prune if you have different repositories +sharing the same custom storage directory; see git-lfs-config(5) for more +details about lfs.storage option.

+ +

In your Git configuration or in a .lfsconfig file, you may set +lfs.fetchexclude to a comma-separated list of paths. If lfs.fetchexclude +is defined, then any Git LFS files whose paths match one in that list will +be pruned unless they are referenced by a stash or an unpushed commit. +Paths are matched using wildcard matching as per gitignore(5).

+ +

OPTIONS

+ +
    +
  • +

    --dry-run -d +Don't actually delete anything, just report on what would have been done

    +
  • +
  • +

    --force -f +Prune all objects except unpushed objects, including objects required for +currently checked out refs. Implies --recent.

    +
  • +
  • +

    --recent +Prune even objects that would normally be preserved by the configuration +options specified below in RECENT FILES.

    +
  • +
  • +

    --verify-remote -c +Contact the remote and check that copies of the files we would delete +definitely exist before deleting. See VERIFY REMOTE.

    +
  • +
  • +

    --no-verify-remote +Disables remote verification if lfs.pruneverifyremotealways was enabled in +settings. See VERIFY REMOTE.

    +
  • +
  • +

    --verbose -v +Report the full detail of what is/would be deleted.

    +
  • +
+ +

RECENT FILES

+ +

Prune won't delete LFS files referenced by 'recent' commits, in case you want +to use them again without having to download. The definition of 'recent' is +derived from the one used by git-lfs-fetch(1) to download recent objects with +the --recent option, with an offset of a number of days (default 3) to ensure +that we always keep files you download for a few days.

+ +

Here are the git-config(1) settings that control this behaviour:

+ +
    +
  • +

    lfs.pruneoffsetdays
    +The number of extra days added to the fetch recent settings when using them +to decide when to prune. So for a reference to be considered old enough to +prune, it has to be this many days older than the oldest reference that would +be downloaded via git lfs fetch --recent. Only used if the relevant +fetch recent 'days' setting is non-zero. Default 3 days.

    +
  • +
  • +

    lfs.fetchrecentrefsdays
    +lfs.fetchrecentremoterefs
    +lfs.fetchrecentcommitsdays
    +These have the same meaning as git-lfs-fetch(1) with the --recent option, +they are used as a base for the offset above. Anything which falls outside +of this offsetted window is considered old enough to prune. If a day value is +zero, that condition is not used at all to retain objects and they will be +pruned.

    +
  • +
+ +

UNPUSHED LFS FILES

+ +

When the only copy of an LFS file is local, and it is still reachable from any +reference, that file can never be pruned, regardless of how old it is.

+ +

To determine whether an LFS file has been pushed, we check the difference +between local refs and remote refs; where the local ref is ahead, any LFS files +referenced in those commits is unpushed and will not be deleted. This works +because the LFS pre-push hook always ensures that LFS files are pushed before +the remote branch is updated.

+ +

See DEFAULT REMOTE, for which remote is considered 'pushed' for pruning +purposes.

+ +

VERIFY REMOTE

+ +

The --verify-remote option calls the remote to ensure that any LFS files to be +deleted have copies on the remote before actually deleting them.

+ +

Usually the check performed by UNPUSHED LFS FILES is enough to determine that +files have been pushed, but if you want to be extra sure at the expense of extra +overhead you can make prune actually call the remote API and verify the +presence of the files you're about to delete locally. See DEFAULT REMOTE for +which remote is checked.

+ +

You can make this behaviour the default by setting lfs.pruneverifyremotealways +to true.

+ +

In addition to the overhead of calling the remote, using this option also +requires prune to distinguish between totally unreachable files (e.g. those that +were added to the index but never committed, or referenced only by orphaned +commits), and files which are still referenced, but by commits which are +prunable. This makes the prune process take longer.

+ +

DEFAULT REMOTE

+ +

When identifying UNPUSHED LFS FILES and performing VERIFY REMOTE, a single +remote, 'origin', is normally used as the reference. This one remote is +considered canonical; even if you use multiple remotes, you probably want to +retain your local copies until they've made it to that remote. 'origin' is used +by default because that will usually be a main central repo, or your fork of +it - in both cases that's a valid remote backup of your work. If origin doesn't +exist then by default nothing will be pruned because everything is treated as +'unpushed'.

+ +

You can alter the remote via git config: lfs.pruneremotetocheck. Set this +to a different remote name to check that one instead of 'origin'.

+ +

SEE ALSO

+ +

git-lfs-fetch(1), gitignore(5).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-prune(1)
  5. +
+ +
+ + diff --git a/git-lfs-3.2.0/man/html/git-lfs-pull.1.html b/git-lfs-3.2.0/man/html/git-lfs-pull.1.html new file mode 100644 index 0000000000000000000000000000000000000000..e591d2681225dfc952919aacb399bd99d7a254e2 --- /dev/null +++ b/git-lfs-3.2.0/man/html/git-lfs-pull.1.html @@ -0,0 +1,143 @@ + + + + + + git-lfs-pull(1) - Download all Git LFS files for current ref &amp; checkout + + + + +
+ + + +
    +
  1. git-lfs-pull(1)
  2. +
  3. +
  4. git-lfs-pull(1)
  5. +
+ + + +

NAME

+

+ git-lfs-pull - Download all Git LFS files for current ref & checkout +

+

SYNOPSIS

+ +

git lfs pull [options] [remote]

+ +

DESCRIPTION

+ +

Download Git LFS objects for the currently checked out ref, and update +the working copy with the downloaded content if required.

+ +

This is equivalent to running the following 2 commands:

+ +

git lfs fetch [options] [remote] +git lfs checkout

+ +

OPTIONS

+ +
+
+-I paths --include=paths +
+
Specify lfs.fetchinclude just for this invocation; see INCLUDE AND EXCLUDE +
+
+-X paths --exclude=paths +
+
Specify lfs.fetchexclude just for this invocation; see INCLUDE AND EXCLUDE +
+
+ +

INCLUDE AND EXCLUDE

+ +

You can configure Git LFS to only fetch objects to satisfy references in certain +paths of the repo, and/or to exclude certain paths of the repo, to reduce the +time you spend downloading things you do not use.

+ +

In your Git configuration or in a .lfsconfig file, you may set either or +both of lfs.fetchinclude and lfs.fetchexclude to comma-separated lists of +paths. If lfs.fetchinclude is defined, Git LFS objects will only be fetched +if their path matches one in that list, and if lfs.fetchexclude is defined, +Git LFS objects will only be fetched if their path does not match one +in that list. Paths are matched using wildcard matching as per gitignore(5).

+ +

Note that using the command-line options -I and -X override the respective +configuration settings. Setting either option to an empty string clears the +value.

+ +

DEFAULT REMOTE

+ +

Without arguments, pull downloads from the default remote. The default remote is +the same as for git pull, i.e. based on the remote branch you're tracking +first, or origin otherwise.

+ +

SEE ALSO

+ +

git-lfs-fetch(1), git-lfs-checkout(1), gitignore(5).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-pull(1)
  5. +
+ +
+ + diff --git a/git-lfs-3.2.0/man/html/git-lfs-push.1.html b/git-lfs-3.2.0/man/html/git-lfs-push.1.html new file mode 100644 index 0000000000000000000000000000000000000000..64b12c82534b118bb1e578836f077858b1808506 --- /dev/null +++ b/git-lfs-3.2.0/man/html/git-lfs-push.1.html @@ -0,0 +1,120 @@ + + + + + + git-lfs-push(1) - Push queued large files to the Git LFS endpoint + + + + +
+ + + +
    +
  1. git-lfs-push(1)
  2. +
  3. +
  4. git-lfs-push(1)
  5. +
+ + + +

NAME

+

+ git-lfs-push - Push queued large files to the Git LFS endpoint +

+

SYNOPSIS

+ +

git lfs push options remote [ref...]
+git lfs push remote [ref...]
+git lfs push --object-id remote [oid...]

+ +

DESCRIPTION

+ +

Upload Git LFS files to the configured endpoint for the current Git remote. By +default, it filters out objects that are already referenced by the local clone +of the remote.

+ +

OPTIONS

+ +
+
--dry-run
+
Print the files that would be pushed, without actually pushing them.
+
--all
+
This pushes all objects to the remote that are referenced by any commit + reachable from the refs provided as arguments. If no refs are provided, then + all local refs are pushed. Note that this behavior differs from that of + git-lfs-fetch(1) when its --all option is used; in that case, all refs + are fetched, including refs other than those under refs/heads and + refs/tags. If you are migrating a repository with these commands, make + sure to run git lfs push for any additional remote refs that contain + Git LFS objects not reachable from your local refs.
+
--object-id
+
This pushes only the object OIDs listed at the end of the command, separated + by spaces.
+
+ +

SEE ALSO

+ +

git-lfs-fetch(1), git-lfs-pre-push(1).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-push(1)
  5. +
+ +
+ + diff --git a/git-lfs-3.2.0/man/html/git-lfs-smudge.1.html b/git-lfs-3.2.0/man/html/git-lfs-smudge.1.html new file mode 100644 index 0000000000000000000000000000000000000000..e0c2a6754208c607dc247e82e6830c1811b2e10f --- /dev/null +++ b/git-lfs-3.2.0/man/html/git-lfs-smudge.1.html @@ -0,0 +1,134 @@ + + + + + + git-lfs-smudge(1) - Git smudge filter that converts pointer in blobs to the actual content + + + + +
+ + + +
    +
  1. git-lfs-smudge(1)
  2. +
  3. +
  4. git-lfs-smudge(1)
  5. +
+ + + +

NAME

+

+ git-lfs-smudge - Git smudge filter that converts pointer in blobs to the actual content +

+

SYNOPSIS

+ +

git lfs smudge [path] +git lfs smudge --skip [path]

+ +

DESCRIPTION

+ +

Read a Git LFS pointer file from standard input and write the contents +of the corresponding large file to standard output. If needed, +download the file's contents from the Git LFS endpoint. The path +argument, if provided, is only used for a progress bar.

+ +

Smudge is typically run by Git's smudge filter, configured by the repository's +Git attributes.

+ +

In your Git configuration or in a .lfsconfig file, you may set either or +both of lfs.fetchinclude and lfs.fetchexclude to comma-separated lists of +paths. If lfs.fetchinclude is defined, Git LFS pointer files will only be +replaced with the contents of the corresponding Git LFS object file if their +path matches one in that list, and if lfs.fetchexclude is defined, Git LFS +pointer files will only be replaced with the contents of the corresponding +Git LFS object file if their path does not match one in that list. Paths are +matched using wildcard matching as per gitignore(5). Git LFS pointer files +that are not replaced with the contents of their corresponding object files +are simply copied to standard output without change.

+ +

OPTIONS

+ +

Without any options, git lfs smudge outputs the raw Git LFS content to +standard output.

+ +
+
--skip
+
Skip automatic downloading of objects on clone or pull.
+
GIT_LFS_SKIP_SMUDGE
+
Disables the smudging process. For more, see: git-lfs-config(5).
+
+ +

KNOWN BUGS

+ +

On Windows, Git before 2.34.0 does not handle files in the working tree larger +than 4 gigabytes. Newer versions of Git, as well as Unix versions, are +unaffected.

+ +

SEE ALSO

+ +

git-lfs-install(1), gitattributes(5), gitignore(5).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-smudge(1)
  5. +
+ +
+ + diff --git a/git-lfs-3.2.0/man/html/git-lfs-standalone-file.1.html b/git-lfs-3.2.0/man/html/git-lfs-standalone-file.1.html new file mode 100644 index 0000000000000000000000000000000000000000..d5400f3ce78619f2d4ef1138883832cd43c96bc2 --- /dev/null +++ b/git-lfs-3.2.0/man/html/git-lfs-standalone-file.1.html @@ -0,0 +1,102 @@ + + + + + + git-lfs-standalone-file(1) - Standalone transfer adapter for file URLs + + + + +
+ + + +
    +
  1. git-lfs-standalone-file(1)
  2. +
  3. +
  4. git-lfs-standalone-file(1)
  5. +
+ + + +

NAME

+

+ git-lfs-standalone-file - Standalone transfer adapter for file URLs +

+

SYNOPSIS

+ +

git lfs standalone-file

+ +

DESCRIPTION

+ +

Provides a standalone transfer adapter for file URLs (local paths).

+ +

By default, Git LFS requires the support of an HTTP server to implement the Git +LFS protocol. However, this tool allows the use of URLs starting with file:/// +(that is, those representing local paths) in addition. Configuration is not +necessary; Git LFS handles this internally.

+ +

When invoked, this tool speaks JSON on input and output as a standalone transfer +adapter. It is not intended for use by end users.

+ +

SEE ALSO

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-standalone-file(1)
  5. +
+ +
+ + diff --git a/git-lfs-3.2.0/man/html/git-lfs-status.1.html b/git-lfs-3.2.0/man/html/git-lfs-status.1.html new file mode 100644 index 0000000000000000000000000000000000000000..c1673778b63008ac79534f299403daab64b32bc7 --- /dev/null +++ b/git-lfs-3.2.0/man/html/git-lfs-status.1.html @@ -0,0 +1,123 @@ + + + + + + git-lfs-status(1) - Show the status of Git LFS files in the working tree + + + + +
+ + + +
    +
  1. git-lfs-status(1)
  2. +
  3. +
  4. git-lfs-status(1)
  5. +
+ + + +

NAME

+

+ git-lfs-status - Show the status of Git LFS files in the working tree +

+

SYNOPSIS

+ +

git lfs status [options]

+ +

DESCRIPTION

+ +

Display paths of Git LFS objects that

+ +
    +
  • +

    have not been pushed to the Git LFS server. These are large files +that would be uploaded by git push.

    +
  • +
  • +

    have differences between the index file and the current HEAD commit. +These are large files that would be committed by git commit.

    +
  • +
  • +

    have differences between the working tree and the index file. These +are files that could be staged using git add.

    +
  • +
+ +

This command must be run in a non-bare repository.

+ +

OPTIONS

+ +
+
--porcelain
+
Give the output in an easy-to-parse format for scripts.
+
--json
+
Give the output in a stable json format for scripts.
+
+ +

SEE ALSO

+ +

git-lfs-ls-files(1).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-status(1)
  5. +
+ +
+ + diff --git a/git-lfs-3.2.0/man/html/git-lfs-track.1.html b/git-lfs-3.2.0/man/html/git-lfs-track.1.html new file mode 100644 index 0000000000000000000000000000000000000000..df3f9681ca8e4d8bf9a259a861e11d462b838a8f --- /dev/null +++ b/git-lfs-3.2.0/man/html/git-lfs-track.1.html @@ -0,0 +1,179 @@ + + + + + + git-lfs-track(1) - View or add Git LFS paths to Git attributes + + + + +
+ + + +
    +
  1. git-lfs-track(1)
  2. +
  3. +
  4. git-lfs-track(1)
  5. +
+ + + +

NAME

+

+ git-lfs-track - View or add Git LFS paths to Git attributes +

+

SYNOPSIS

+ +

git lfs track [options] [pattern...]

+ +

DESCRIPTION

+ +

Start tracking the given patterns(s) through Git LFS. The pattern argument +is written to .gitattributes. If no paths are provided, simply list the +currently-tracked paths.

+ +

The gitattributes documentation states +that patterns use the gitignore pattern rules +to match paths. This means that patterns which contain asterisk (*), question +mark (?), and the bracket characters ([ and ]) are treated specially; to +disable this behavior and treat them literally instead, use --filename or +escape the character with a backslash.

+ +

OPTIONS

+ +
    +
  • +

    --verbose -v: +If enabled, have git lfs track log files which it will touch. Disabled by +default.

    +
  • +
  • +

    --dry-run -d: +If enabled, have git lfs track log all actions it would normally take +(adding entries to .gitattributes, touching files on disk, etc) without +performing any mutative operations to the disk.

    + +

    git lfs track --dry-run [files] also implicitly mocks the behavior of +passing the --verbose, and will log in greater detail what it is doing.

    + +

    Disabled by default.

    +
  • +
  • +

    --filename +Treat the arguments as literal filenames, not as patterns. Any special glob +characters in the filename will be escaped when writing the .gitattributes +file.

    +
  • +
  • +

    --lockable -l +Make the paths 'lockable', meaning they should be locked to edit them, and +will be made read-only in the working copy when not locked.

    +
  • +
  • +

    --not-lockable +Remove the lockable flag from the paths so they are no longer read-only unless +locked.

    +
  • +
  • +

    --no-excluded +Do not list patterns that are excluded in the output; only list patterns that +are tracked.

    +
  • +
  • +

    --no-modify-attrs +Makes matched entries stat-dirty so that Git can re-index files you wish to +convert to LFS. Does not modify any .gitattributes file(s).

    +
  • +
+ +

EXAMPLES

+ +
+
List the patterns that Git LFS is currently tracking: + +

git lfs track

+
+
+
Configure Git LFS to track GIF files: + +

git lfs track "*.gif"

+
+
+
Configure Git LFS to track PSD files and make them read-only unless locked: + +

git lfs track --lockable "*.psd"

+
+
+
Configure Git LFS to track the file named project [1].psd: + +

git lfs track --filename "project [1].psd"

+
+
+
+ +

SEE ALSO

+ +

git-lfs-untrack(1), git-lfs-install(1), gitattributes(5), gitignore(5).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-track(1)
  5. +
+ +
+ + diff --git a/git-lfs-3.2.0/man/html/git-lfs-uninstall.1.html b/git-lfs-3.2.0/man/html/git-lfs-uninstall.1.html new file mode 100644 index 0000000000000000000000000000000000000000..24641d96d1a6e00e921427f05a4b2671f2221c7a --- /dev/null +++ b/git-lfs-3.2.0/man/html/git-lfs-uninstall.1.html @@ -0,0 +1,126 @@ + + + + + + git-lfs-uninstall(1) - Remove Git LFS configuration + + + + +
+ + + +
    +
  1. git-lfs-uninstall(1)
  2. +
  3. +
  4. git-lfs-uninstall(1)
  5. +
+ + + +

NAME

+

+ git-lfs-uninstall - Remove Git LFS configuration +

+

SYNOPSIS

+ +

git lfs uninstall

+ +

DESCRIPTION

+ +

Perform the following actions to remove the Git LFS configuration:

+ +
    +
  • Remove the "lfs" clean and smudge filters from the global Git config.
  • +
  • Uninstall the Git LFS pre-push hook if run from inside a Git repository.
  • +
+ +

OPTIONS

+ +
+
--local
+
Removes the "lfs" smudge and clean filters from the local repository's git + config, instead of the global git config (~/.gitconfig).
+
--worktree
+
Removes the "lfs" smudge and clean filters from the current working tree's + git config, instead of the global git config (~/.gitconfig) or local + repository's git config ($GIT_DIR/config). + If multiple working trees are in use, the Git config extension + worktreeConfig must be enabled to use this option. + If only one working tree is in use, --worktree has the same effect + as --local. + This option is only available if the installed Git version is at least + 2.20.0 and therefore supports the "worktreeConfig" extension.
+
--system
+
Removes the "lfs" smudge and clean filters from the system git config, + instead of the global git config (~/.gitconfig).
+
--skip-repo
+
Skips cleanup of the local repo; use if you want to uninstall the global lfs + filters but not make changes to the current repo.
+
+ +

SEE ALSO

+ +

git-lfs-install(1), git-worktree(1).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-uninstall(1)
  5. +
+ +
+ + diff --git a/git-lfs-3.2.0/man/html/git-lfs-unlock.1.html b/git-lfs-3.2.0/man/html/git-lfs-unlock.1.html new file mode 100644 index 0000000000000000000000000000000000000000..ea3dec5fc97227c9a208baf1ae97236a707fbcc6 --- /dev/null +++ b/git-lfs-3.2.0/man/html/git-lfs-unlock.1.html @@ -0,0 +1,120 @@ + + + + + + git-lfs-unlock(1) - Remove "locked" setting for a file on the Git LFS server + + + + +
+ + + +
    +
  1. git-lfs-unlock(1)
  2. +
  3. +
  4. git-lfs-unlock(1)
  5. +
+ + + +

NAME

+

+ git-lfs-unlock - Remove "locked" setting for a file on the Git LFS server +

+

SYNOPSIS

+ +

git lfs unlock OPTIONS path

+ +

DESCRIPTION

+ +

Removes the given file path as "locked" on the Git LFS server. Files must exist +and have a clean git status before they can be unlocked. The --force flag will +skip these checks.

+ +

OPTIONS

+ +
+
+-r name --remote=name +
+
Specify the Git LFS server to use. Ignored if the lfs.url config key is set.
+
+-f --force +
+
Tells the server to remove the lock, even if it's owned by another user.
+
+-i <id> --id=<id> +
+
Specifies a lock by its ID instead of path.
+
--json
+
Writes lock info as JSON to STDOUT if the command exits successfully. Intended +for interoperation with external tools. If the command returns with a non-zero +exit code, plain text messages will be sent to STDERR.
+
+ +

SEE ALSO

+ +

git-lfs-lock(1), git-lfs-locks(1).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-unlock(1)
  5. +
+ +
+ + diff --git a/git-lfs-3.2.0/man/html/git-lfs-untrack.1.html b/git-lfs-3.2.0/man/html/git-lfs-untrack.1.html new file mode 100644 index 0000000000000000000000000000000000000000..54ab07593f947a592123bdc15cca8b3f257d5019 --- /dev/null +++ b/git-lfs-3.2.0/man/html/git-lfs-untrack.1.html @@ -0,0 +1,108 @@ + + + + + + git-lfs-untrack(1) - Remove Git LFS paths from Git Attributes + + + + +
+ + + +
    +
  1. git-lfs-untrack(1)
  2. +
  3. +
  4. git-lfs-untrack(1)
  5. +
+ + + +

NAME

+

+ git-lfs-untrack - Remove Git LFS paths from Git Attributes +

+

SYNOPSIS

+ +

git lfs untrack path...

+ +

DESCRIPTION

+ +

Stop tracking the given path(s) through Git LFS. The path argument +can be a glob pattern or a file path.

+ +

EXAMPLES

+ +
+
Configure Git LFS to stop tracking GIF files: + +

git lfs untrack "*.gif"

+
+
+
+ +

SEE ALSO

+ +

git-lfs-track(1), git-lfs-install(1), gitattributes(5).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-untrack(1)
  5. +
+ +
+ + diff --git a/git-lfs-3.2.0/man/html/git-lfs-update.1.html b/git-lfs-3.2.0/man/html/git-lfs-update.1.html new file mode 100644 index 0000000000000000000000000000000000000000..f66d524154bfb72b4da8b82ccc903952502b723a --- /dev/null +++ b/git-lfs-3.2.0/man/html/git-lfs-update.1.html @@ -0,0 +1,122 @@ + + + + + + git-lfs-update(1) - Update Git hooks + + + + +
+ + + +
    +
  1. git-lfs-update(1)
  2. +
  3. +
  4. git-lfs-update(1)
  5. +
+ + + +

NAME

+

+ git-lfs-update - Update Git hooks +

+

SYNOPSIS

+ + + + + + + + +
+git lfs update [--manual--force]
+ +

DESCRIPTION

+ +

Updates the Git hooks used by Git LFS. Silently upgrades known hook contents. +If you have your own custom hooks you may need to use one of the extended +options below.

+ +

OPTIONS

+ +
    +
  • +

    --manual -m + Print instructions for manually updating your hooks to include git-lfs + functionality. Use this option if git lfs update fails because of existing + hooks and you want to retain their functionality.

    +
  • +
  • +

    --force -f + Forcibly overwrite any existing hooks with git-lfs hooks. Use this option + if git lfs update fails because of existing hooks but you don't care + about their current contents.

    +
  • +
+ +

SEE ALSO

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-update(1)
  5. +
+ +
+ + diff --git a/git-lfs-3.2.0/man/html/git-lfs.1.html b/git-lfs-3.2.0/man/html/git-lfs.1.html new file mode 100644 index 0000000000000000000000000000000000000000..6e3a7f9fb16a3be861a19b18367699511049afae --- /dev/null +++ b/git-lfs-3.2.0/man/html/git-lfs.1.html @@ -0,0 +1,220 @@ + + + + + + git-lfs(1) - Work with large files in Git repositories + + + + +
+ + + +
    +
  1. git-lfs(1)
  2. +
  3. +
  4. git-lfs(1)
  5. +
+ + + +

NAME

+

+ git-lfs - Work with large files in Git repositories +

+

SYNOPSIS

+ +

git lfs command [args]

+ +

DESCRIPTION

+ +

Git LFS is a system for managing and versioning large files in +association with a Git repository. Instead of storing the large files +within the Git repository as blobs, Git LFS stores special "pointer +files" in the repository, while storing the actual file contents on a +Git LFS server. The contents of the large file are downloaded +automatically when needed, for example when a Git branch containing +the large file is checked out.

+ +

Git LFS works by using a "smudge" filter to look up the large file +contents based on the pointer file, and a "clean" filter to create a +new version of the pointer file when the large file's contents change. +It also uses a pre-push hook to upload the large file contents to +the Git LFS server whenever a commit containing a new large file +version is about to be pushed to the corresponding Git server.

+ +

COMMANDS

+ +

Like Git, Git LFS commands are separated into high level ("porcelain") +commands and low level ("plumbing") commands.

+ +

High level porcelain commands

+ +
+
git-lfs-checkout(1)
+
Populate working copy with real content from Git LFS files.
+
git-lfs-dedup(1)
+
De-duplicate Git LFS files.
+
git-lfs-env(1)
+
Display the Git LFS environment.
+
git-lfs-ext(1)
+
Display Git LFS extension details.
+
git-lfs-fetch(1)
+
Download Git LFS files from a remote.
+
git-lfs-fsck(1)
+
Check Git LFS files for consistency.
+
git-lfs-install(1)
+
Install Git LFS configuration.
+
git-lfs-lock(1)
+
Set a file as "locked" on the Git LFS server.
+
git-lfs-locks(1)
+
List currently "locked" files from the Git LFS server.
+
git-lfs-logs(1)
+
Show errors from the Git LFS command.
+
git-lfs-ls-files(1)
+
Show information about Git LFS files in the index and working tree.
+
git-lfs-migrate(1)
+
Migrate history to or from Git LFS
+
git-lfs-prune(1)
+
Delete old Git LFS files from local storage
+
git-lfs-pull(1)
+
Fetch Git LFS changes from the remote & checkout any required working tree + files.
+
git-lfs-push(1)
+
Push queued large files to the Git LFS endpoint.
+
git-lfs-status(1)
+
Show the status of Git LFS files in the working tree.
+
git-lfs-track(1)
+
View or add Git LFS paths to Git attributes.
+
git-lfs-uninstall(1)
+
Uninstall Git LFS by removing hooks and smudge/clean filter configuration.
+
git-lfs-unlock(1)
+
Remove "locked" setting for a file on the Git LFS server.
+
git-lfs-untrack(1)
+
Remove Git LFS paths from Git Attributes.
+
git-lfs-update(1)
+
Update Git hooks for the current Git repository.
+
git-lfs-version(1)
+
Report the version number.
+
+ +

Low level plumbing commands

+ +
+
git-lfs-clean(1)
+
Git clean filter that converts large files to pointers.
+
git-lfs-filter-process(1)
+
Git process filter that converts between large files and pointers.
+
git-lfs-merge-driver(1)
+
Merge text-based LFS files
+
git-lfs-pointer(1)
+
Build and compare pointers.
+
git-lfs-post-checkout(1)
+
Git post-checkout hook implementation.
+
git-lfs-post-commit(1)
+
Git post-commit hook implementation.
+
git-lfs-post-merge(1)
+
Git post-merge hook implementation.
+
git-lfs-pre-push(1)
+
Git pre-push hook implementation.
+
git-lfs-smudge(1)
+
Git smudge filter that converts pointer in blobs to the actual content.
+
git-lfs-standalone-file(1)
+
Git LFS standalone transfer adapter for file URLs (local paths).
+
+ +

EXAMPLES

+ +

To get started with Git LFS, the following commands can be used.

+ +
    +
  1. +

    Setup Git LFS on your system. You only have to do this once per +repository per machine:

    + +
    git lfs install
    +
    +
  2. +
  3. +

    Choose the type of files you want to track, for examples all ISO +images, with git-lfs-track(1):

    + +
    git lfs track "*.iso"
    +
    +
  4. +
  5. +

    The above stores this information in gitattributes(5) files, so +that file needs to be added to the repository:

    + +
    git add .gitattributes
    +
    +
  6. +
  7. +

    Commit, push and work with the files normally:

    + +
    git add file.iso
    +git commit -m "Add disk image"
    +git push
    +
    +
  8. +
+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs(1)
  5. +
+ +
+ + diff --git a/git-lfs-3.2.0/man/man1/git-lfs-checkout.1 b/git-lfs-3.2.0/man/man1/git-lfs-checkout.1 new file mode 100644 index 0000000000000000000000000000000000000000..30291fc1ca608cecc75c233afb3dd715d8954e8d --- /dev/null +++ b/git-lfs-3.2.0/man/man1/git-lfs-checkout.1 @@ -0,0 +1,50 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-CHECKOUT" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-checkout\fR \- Update working copy with file content if available +.SH "SYNOPSIS" +\fBgit lfs checkout\fR [\fIglob\-pattern\fR\|\.\|\.\|\.] +.br +\fBgit lfs checkout\fR \-\-to \fIfile\fR {\-\-base|\-\-ours|\-\-theirs} \fIconflict\-obj\-path\fR +.SH "DESCRIPTION" +Try to ensure that the working copy contains file content for Git LFS objects for the current ref, if the object data is available\. Does not download any content; see git\-lfs\-fetch(1) for that\. +.P +Checkout scans the current ref for all LFS objects that would be required, then where a file is either missing in the working copy, or contains placeholder pointer content with the same SHA, the real file content is written, provided we have it in the local store\. Modified files are never overwritten\. +.P +One or more \fIglob\-pattern\fRs may be provided as arguments to restrict the set of files that are updated\. Glob patterns are matched as per the format described in gitignore(5)\. +.P +When used with \fB\-\-to\fR and the working tree is in a conflicted state due to a merge, this option checks out one of the three stages a conflicting Git LFS object into a separate file (which can be outside of the work tree)\. This can make using diff tools to inspect and resolve merges easier\. A single Git LFS object\'s file path must be provided in \fIconflict\-obj\-path\fR\. +.SH "OPTIONS" +.TP +\fB\-\-base\fR +Check out the merge base of the specified file\. +.TP +\fB\-\-ours\fR +Check out our side (that of the current branch) of the conflict for the specified file\. +.TP +\fB\-\-theirs\fR +Check out their side (that of the other branch) of the conflict for the specified file\. +.TP +\fB\-\-to\fR \fIpath\fR +If the working tree is in a conflicted state, check out the portion of the conflict specified by \fB\-\-base\fR, \fB\-\-ours\fR, or \fB\-\-theirs\fR to the given path\. +.SH "EXAMPLES" +.TP +Checkout all files that are missing or placeholders: + +.P +\fB$ git lfs checkout\fR +.TP +Checkout a specific couple of files: + +.P +\fB$ git lfs checkout path/to/file1\.png path/to\.file2\.png\fR +.TP +Checkout a path with a merge conflict into separate files: + +.P +``` # Attempt merge with a branch that has a merge conflict $ git merge conflicting\-branch CONFLICT (content): Merge conflict in path/to/conflicting/file\.dat$ git lfs checkout ours\.dat \-\-ours path/to/conflicting/file\.dat $ git lfs checkout theirs\.dat \-\-theirs path/to/conflicting/file\.dat# then resolve conflict (e\.g\., by choosing one version over # the other, or creating a new version)$ rm ours\.dat theirs\.dat $ git add path/to/conflicting/file\.dat $ git merge \-\-continue ``` +.SH "SEE ALSO" +git\-lfs\-fetch(1), git\-lfs\-pull(1), gitignore(5)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/git-lfs-3.2.0/man/man1/git-lfs-clean.1 b/git-lfs-3.2.0/man/man1/git-lfs-clean.1 new file mode 100644 index 0000000000000000000000000000000000000000..0b1cfdcd8e98342a7c426afd51b2e16dc3a312bb --- /dev/null +++ b/git-lfs-3.2.0/man/man1/git-lfs-clean.1 @@ -0,0 +1,17 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-CLEAN" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-clean\fR \- Git clean filter that converts large files to pointers +.SH "SYNOPSIS" +\fBgit lfs clean\fR \fIpath\fR +.SH "DESCRIPTION" +Read the contents of a large file from standard input, and write a Git LFS pointer file for that file to standard output\. +.P +Clean is typically run by Git\'s clean filter, configured by the repository\'s Git attributes\. +.P +Clean is not part of the user\-facing Git plumbing commands\. To preview the pointer of a large file as it would be generated, see the git\-lfs\-pointer(1) command\. +.SH "SEE ALSO" +git\-lfs\-install(1), git\-lfs\-push(1), git\-lfs\-pointer(1), gitattributes(5)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/git-lfs-3.2.0/man/man1/git-lfs-clone.1 b/git-lfs-3.2.0/man/man1/git-lfs-clone.1 new file mode 100644 index 0000000000000000000000000000000000000000..494c75ea395e7605969a46742614e5fe38927fa9 --- /dev/null +++ b/git-lfs-3.2.0/man/man1/git-lfs-clone.1 @@ -0,0 +1,34 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-CLONE" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-clone\fR \- Efficiently clone a LFS\-enabled repository +.SH "SYNOPSIS" +\fBgit lfs clone\fR [git clone options] \fIrepository\fR [\fIdirectory\fR] +.SH "DESCRIPTION" +Clone an LFS enabled Git repository more efficiently by disabling LFS during the git clone, then performing a \'git lfs pull\' directly afterwards\. +.P +\'git lfs clone\' also installs all of the repo\-level hooks (\.git/hooks) that LFS requires to operate\. If \fB\-\-separate\-git\-dir\fR is given, the hooks will be installed there\. +.P +This is faster than a regular \'git clone\' because that will download LFS content using the smudge filter, which is executed individually per file in the working copy\. This is relatively inefficient compared to the batch mode and parallel downloads performed by \'git lfs pull\'\. +.SH "OPTIONS" +All options supported by \'git clone\' +.TP +\fB\-I\fR \fIpaths\fR \fB\-\-include=\fR\fIpaths\fR +See \fIINCLUDE AND EXCLUDE\fR +.TP +\fB\-X\fR \fIpaths\fR \fB\-\-exclude=\fR\fIpaths\fR +See \fIINCLUDE AND EXCLUDE\fR +.TP +\fB\-\-skip\-repo\fR +Skip installing repo\-level hooks (\.git/hooks) that LFS requires\. Disabled by default\. +.SH "INCLUDE AND EXCLUDE" +You can configure Git LFS to only fetch objects to satisfy references in certain paths of the repo, and/or to exclude certain paths of the repo, to reduce the time you spend downloading things you do not use\. +.P +In your Git configuration or in a \fB\.lfsconfig\fR file, you may set either or both of \fBlfs\.fetchinclude\fR and \fBlfs\.fetchexclude\fR to comma\-separated lists of paths\. If \fBlfs\.fetchinclude\fR is defined, Git LFS objects will only be fetched if their path matches one in that list, and if \fBlfs\.fetchexclude\fR is defined, Git LFS objects will only be fetched if their path does not match one in that list\. Paths are matched using wildcard matching as per gitignore(5)\. +.P +Note that using the command\-line options \fB\-I\fR and \fB\-X\fR override the respective configuration settings\. Setting either option to an empty string clears the value\. +.SH "SEE ALSO" +git\-clone(1), git\-lfs\-pull(1), gitignore(5)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/git-lfs-3.2.0/man/man1/git-lfs-dedup.1 b/git-lfs-3.2.0/man/man1/git-lfs-dedup.1 new file mode 100644 index 0000000000000000000000000000000000000000..6358c0aa9d665e7a93b1a1b0ff252e4f472f15d1 --- /dev/null +++ b/git-lfs-3.2.0/man/man1/git-lfs-dedup.1 @@ -0,0 +1,15 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-DEDUP" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-dedup\fR \- Deduplicate Git LFS files +.SH "SYNOPSIS" +\fBgit lfs dedup\fR +.SH "DESCRIPTION" +Deduplicates storage by re\-creating working tree files as clones of the files in the Git LFS storage directory using the operating system\'s copy\-on\-write file creation functionality\. +.P +If the operating system or file system don\'t support copy\-on\-write file creation, this command exits unsuccessfully\. +.P +This command will also exit without success if any Git LFS extensions are configured, as these will typically be used to alter the file contents before they are written to the Git LFS storage directory, and therefore the working tree files should not be copy\-on\-write clones of the LFS object files\. +.SH "SEE ALSO" +Part of the git\-lfs(1) suite\. diff --git a/git-lfs-3.2.0/man/man1/git-lfs-env.1 b/git-lfs-3.2.0/man/man1/git-lfs-env.1 new file mode 100644 index 0000000000000000000000000000000000000000..084a669ef873ca78a9d25f9dfe800cf980b7622c --- /dev/null +++ b/git-lfs-3.2.0/man/man1/git-lfs-env.1 @@ -0,0 +1,11 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-ENV" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-env\fR \- Display the Git LFS environment +.SH "SYNOPSIS" +\fBgit lfs env\fR +.SH "DESCRIPTION" +Display the current Git LFS environment\. +.SH "SEE ALSO" +Part of the git\-lfs(1) suite\. diff --git a/git-lfs-3.2.0/man/man1/git-lfs-ext.1 b/git-lfs-3.2.0/man/man1/git-lfs-ext.1 new file mode 100644 index 0000000000000000000000000000000000000000..1b16271c0a79d674abc5fefab38b646022b9ae88 --- /dev/null +++ b/git-lfs-3.2.0/man/man1/git-lfs-ext.1 @@ -0,0 +1,21 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-EXT" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-ext\fR \- View extension details +.SH "SYNOPSIS" +\fBgit lfs ext list\fR [\fIname\fR\|\.\|\.\|\.] +.SH "DESCRIPTION" +Git LFS extensions enable the manipulation of files streams during smudge and clean\. +.SH "EXAMPLES" +.IP "\[ci]" 4 +List details for all extensions +.IP +\fBgit lfs ext\fR \fBgit lfs ext list\fR +.IP "\[ci]" 4 +List details for the specified extensions +.IP +\fBgit lfs ext list \'foo\' \'bar\'\fR +.IP "" 0 +.SH "SEE ALSO" +Part of the git\-lfs(1) suite\. diff --git a/git-lfs-3.2.0/man/man1/git-lfs-fetch.1 b/git-lfs-3.2.0/man/man1/git-lfs-fetch.1 new file mode 100644 index 0000000000000000000000000000000000000000..490dbc8658520c488284bf1b0502696797d6a8d0 --- /dev/null +++ b/git-lfs-3.2.0/man/man1/git-lfs-fetch.1 @@ -0,0 +1,100 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-FETCH" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-fetch\fR \- Download all Git LFS files for a given ref +.SH "SYNOPSIS" +\fBgit lfs fetch\fR [options] [\fIremote\fR [\fIref\fR\|\.\|\.\|\.]] +.SH "DESCRIPTION" +Download Git LFS objects at the given refs from the specified remote\. See \fIDEFAULT REMOTE\fR and \fIDEFAULT REFS\fR for what happens if you don\'t specify\. +.P +This does not update the working copy\. +.SH "OPTIONS" +.TP +\fB\-I\fR \fIpaths\fR \fB\-\-include=\fR\fIpaths\fR +Specify lfs\.fetchinclude just for this invocation; see \fIINCLUDE AND EXCLUDE\fR +.TP +\fB\-X\fR \fIpaths\fR \fB\-\-exclude=\fR\fIpaths\fR +Specify lfs\.fetchexclude just for this invocation; see \fIINCLUDE AND EXCLUDE\fR +.TP +\fB\-\-recent\fR +Download objects referenced by recent branches & commits in addition to those that would otherwise be downloaded\. See \fIRECENT CHANGES\fR +.TP +\fB\-\-all\fR +Download all objects that are referenced by any commit reachable from the refs provided as arguments\. If no refs are provided, then all refs are fetched\. This is primarily for backup and migration purposes\. Cannot be combined with \-\-recent or \-\-include/\-\-exclude\. Ignores any globally configured include and exclude paths to ensure that all objects are downloaded\. +.TP +\fB\-\-prune\fR \fB\-p\fR +Prune old and unreferenced objects after fetching, equivalent to running \fBgit lfs prune\fR afterwards\. See git\-lfs\-prune(1) for more details\. +.SH "INCLUDE AND EXCLUDE" +You can configure Git LFS to only fetch objects to satisfy references in certain paths of the repo, and/or to exclude certain paths of the repo, to reduce the time you spend downloading things you do not use\. +.P +In your Git configuration or in a \fB\.lfsconfig\fR file, you may set either or both of \fBlfs\.fetchinclude\fR and \fBlfs\.fetchexclude\fR to comma\-separated lists of paths\. If \fBlfs\.fetchinclude\fR is defined, Git LFS objects will only be fetched if their path matches one in that list, and if \fBlfs\.fetchexclude\fR is defined, Git LFS objects will only be fetched if their path does not match one in that list\. Paths are matched using wildcard matching as per gitignore(5)\. +.P +Note that using the command\-line options \fB\-I\fR and \fB\-X\fR override the respective configuration settings\. Setting either option to an empty string clears the value\. +.SS "Examples" +.IP "\[ci]" 4 +\fBgit config lfs\.fetchinclude "textures,images/foo*"\fR +.IP +This will only fetch objects referenced in paths in the textures folder, and files called foo* in the images folder +.IP "\[ci]" 4 +\fBgit config lfs\.fetchinclude "*\.jpg,*\.png,*\.tga"\fR +.IP +Only fetch JPG/PNG/TGA files, wherever they are in the repository +.IP "\[ci]" 4 +\fBgit config lfs\.fetchexclude "media/reallybigfiles"\fR +.IP +Don\'t fetch any LFS objects referenced in the folder media/reallybigfiles, but fetch everything else +.IP "\[ci]" 4 +\fBgit config lfs\.fetchinclude "media"\fR +.br +\fBgit config lfs\.fetchexclude "media/excessive"\fR +.IP +Only fetch LFS objects in the \'media\' folder, but exclude those in one of its subfolders\. +.IP "" 0 +.SH "DEFAULT REMOTE" +Without arguments, fetch downloads from the default remote\. The default remote is the same as for \fBgit fetch\fR, i\.e\. based on the remote branch you\'re tracking first, or origin otherwise\. +.SH "DEFAULT REFS" +If no refs are given as arguments, the currently checked out ref is used\. In addition, if enabled, recently changed refs and commits are also included\. See \fIRECENT CHANGES\fR for details\. +.SH "RECENT CHANGES" +If the \fB\-\-recent\fR option is specified, or if the gitconfig option \fBlfs\.fetchrecentalways\fR is true, then after the current ref (or those in the arguments) is fetched, we also search for \'recent\' changes to fetch objects for, so that it\'s more convenient to checkout or diff those commits without incurring further downloads\. +.P +What changes are considered \'recent\' is based on a number of gitconfig options: +.IP "\[ci]" 4 +\fBlfs\.fetchrecentrefsdays\fR If non\-zero, includes branches which have commits within N days of the current date\. Only local refs are included unless lfs\.fetchrecentremoterefs is true\. The default is 7 days\. +.IP "\[ci]" 4 +\fBlfs\.fetchrecentremoterefs\fR If true, fetches remote refs (for the remote you\'re fetching) as well as local refs in the recent window\. This is useful to fetch objects for remote branches you might want to check out later\. The default is true; if you set this to false, fetching for those branches will only occur when you either check them out (losing the advantage of fetch \-\-recent), or create a tracking local branch separately then fetch again\. +.IP "\[ci]" 4 +\fBlfs\.fetchrecentcommitsdays\fR In addition to fetching at branches, also fetches changes made within N days of the latest commit on the branch\. This is useful if you\'re often reviewing recent changes\. The default is 0 (no previous changes)\. +.IP "\[ci]" 4 +\fBlfs\.fetchrecentalways\fR Always operate as if \-\-recent was provided on the command line\. +.IP "" 0 +.SH "EXAMPLES" +.IP "\[ci]" 4 +Fetch the LFS objects for the current ref from default remote +.IP +\fBgit lfs fetch\fR +.IP "\[ci]" 4 +Fetch the LFS objects for the current ref AND recent changes from default remote +.IP +\fBgit lfs fetch \-\-recent\fR +.IP "\[ci]" 4 +Fetch the LFS objects for the current ref from a secondary remote \'upstream\' +.IP +\fBgit lfs fetch upstream\fR +.IP "\[ci]" 4 +Fetch all the LFS objects from the default remote that are referenced by any commit in the \fBmain\fR and \fBdevelop\fR branches +.IP +\fBgit lfs fetch \-\-all origin main develop\fR +.IP "\[ci]" 4 +Fetch the LFS objects for a branch from origin +.IP +\fBgit lfs fetch origin mybranch\fR +.IP "\[ci]" 4 +Fetch the LFS objects for 2 branches and a commit from origin +.IP +\fBgit lfs fetch origin main mybranch e445b45c1c9c6282614f201b62778e4c0688b5c8\fR +.IP "" 0 +.SH "SEE ALSO" +git\-lfs\-checkout(1), git\-lfs\-pull(1), git\-lfs\-prune(1), gitconfig(5)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/git-lfs-3.2.0/man/man1/git-lfs-filter-process.1 b/git-lfs-3.2.0/man/man1/git-lfs-filter-process.1 new file mode 100644 index 0000000000000000000000000000000000000000..537b01830dc7d170df1ca739c9ede9f8d364bc8f --- /dev/null +++ b/git-lfs-3.2.0/man/man1/git-lfs-filter-process.1 @@ -0,0 +1,27 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-FILTER\-PROCESS" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-filter\-process\fR \- Git filter process that converts between pointer and actual content +.SH "SYNOPSIS" +\fBgit lfs filter\-process\fR \fBgit lfs filter\-process \-\-skip\fR +.SH "DESCRIPTION" +Implement the Git process filter API, exchanging handshake messages and then accepting and responding to requests to either clean or smudge a file\. +.P +filter\-process is always run by Git\'s filter process, and is configured by the repository\'s Git attributes\. +.P +In your Git configuration or in a \fB\.lfsconfig\fR file, you may set either or both of \fBlfs\.fetchinclude\fR and \fBlfs\.fetchexclude\fR to comma\-separated lists of paths\. If \fBlfs\.fetchinclude\fR is defined, Git LFS pointer files will only be replaced with the contents of the corresponding Git LFS object file if their path matches one in that list, and if \fBlfs\.fetchexclude\fR is defined, Git LFS pointer files will only be replaced with the contents of the corresponding Git LFS object file if their path does not match one in that list\. Paths are matched using wildcard matching as per gitignore(5)\. Git LFS pointer files that are not replaced with the contents of their corresponding object files are simply copied to standard output without change\. +.P +The filter process uses Git\'s pkt\-line protocol to communicate, and is documented in detail in gitattributes(5)\. +.SH "OPTIONS" +Without any options, filter\-process accepts and responds to requests normally\. +.TP +\fB\-\-skip\fR +Skip automatic downloading of objects on clone or pull\. +.TP +\fBGIT_LFS_SKIP_SMUDGE\fR +Disables the smudging process\. For more, see: git\-lfs\-config(5)\. +.SH "SEE ALSO" +git\-lfs\-clean(1), git\-lfs\-install(1), git\-lfs\-smudge(1), gitattributes(5), gitignore(5)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/git-lfs-3.2.0/man/man1/git-lfs-fsck.1 b/git-lfs-3.2.0/man/man1/git-lfs-fsck.1 new file mode 100644 index 0000000000000000000000000000000000000000..04992190a2e95d751a8d9f7de4666dba2e99b93b --- /dev/null +++ b/git-lfs-3.2.0/man/man1/git-lfs-fsck.1 @@ -0,0 +1,28 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-FSCK" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-fsck\fR \- Check GIT LFS files for consistency +.SH "SYNOPSIS" +\fBgit lfs fsck\fR [options] [revisions] +.SH "DESCRIPTION" +Checks all Git LFS files in the current HEAD for consistency\. +.P +Corrupted files are moved to "\.git/lfs/bad"\. +.P +The revisions may be specified as either a single committish, in which case only that commit is inspected; specified as a range of the form \fBA\.\.B\fR (and only this form), in which case that range is inspected; or omitted entirely, in which case HEAD (and, for \-\-objects, the index) is examined\. +.P +The default is to perform all checks\. +.P +In your Git configuration or in a \fB\.lfsconfig\fR file, you may set \fBlfs\.fetchexclude\fR to a comma\-separated list of paths\. If \fBlfs\.fetchexclude\fR is defined, then any Git LFS files whose paths match one in that list will not be checked for consistency\. Paths are matched using wildcard matching as per gitignore(5)\. +.SH "OPTIONS" +.TP +\fB\-\-objects\fR +Check that each object in HEAD matches its expected hash and that each object exists on disk\. +.TP +\fB\-\-pointers\fR +Check that each pointer is canonical and that each file which should be stored as a Git LFS file is so stored\. +.SH "SEE ALSO" +git\-lfs\-ls\-files(1), git\-lfs\-status(1), gitignore(5)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/git-lfs-3.2.0/man/man1/git-lfs-install.1 b/git-lfs-3.2.0/man/man1/git-lfs-install.1 new file mode 100644 index 0000000000000000000000000000000000000000..eb70f36efe9034a73df5b396b5443f06efc29b21 --- /dev/null +++ b/git-lfs-3.2.0/man/man1/git-lfs-install.1 @@ -0,0 +1,41 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-INSTALL" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-install\fR \- Install Git LFS configuration\. +.SH "SYNOPSIS" +\fBgit lfs install\fR \fIoptions\fR +.SH "DESCRIPTION" +Perform the following actions to ensure that Git LFS is setup properly: +.IP "\[ci]" 4 +Set up the clean and smudge filters under the name "lfs" in the global Git config\. +.IP "\[ci]" 4 +Install a pre\-push hook to run git\-lfs\-pre\-push(1) for the current repository, if run from inside one\. If "core\.hooksPath" is configured in any Git configuration (and supported, i\.e\., the installed Git version is at least 2\.9\.0), then the pre\-push hook will be installed to that directory instead\. +.IP "" 0 +.SH "OPTIONS" +Without any options, \fBgit lfs install\fR will only setup the "lfs" smudge and clean filters if they are not already set\. +.TP +\fB\-\-force\fR +Sets the "lfs" smudge and clean filters, overwriting existing values\. +.TP +\fB\-\-local\fR +Sets the "lfs" smudge and clean filters in the local repository\'s git config, instead of the global git config (~/\.gitconfig)\. +.TP +\fB\-\-worktree\fR +Sets the "lfs" smudge and clean filters in the current working tree\'s git config, instead of the global git config (~/\.gitconfig) or local repository\'s git config ($GIT_DIR/config)\. If multiple working trees are in use, the Git config extension \fBworktreeConfig\fR must be enabled to use this option\. If only one working tree is in use, \fB\-\-worktree\fR has the same effect as \fB\-\-local\fR\. This option is only available if the installed Git version is at least 2\.20\.0 and therefore supports the "worktreeConfig" extension\. +.TP +\fB\-\-manual\fR +Print instructions for manually updating your hooks to include git\-lfs functionality\. Use this option if \fBgit lfs install\fR fails because of existing hooks and you want to retain their functionality\. +.TP +\fB\-\-system\fR +Sets the "lfs" smudge and clean filters in the system git config, e\.g\. /etc/gitconfig instead of the global git config (~/\.gitconfig)\. +.TP +\fB\-\-skip\-smudge\fR +Skips automatic downloading of objects on clone or pull\. This requires a manual "git lfs pull" every time a new commit is checked out on your repository\. +.TP +\fB\-\-skip\-repo\fR +Skips setup of the local repo; use if you want to install the global lfs filters but not make changes to the current repo\. +.SH "SEE ALSO" +git\-lfs\-uninstall(1), git\-worktree(1)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/git-lfs-3.2.0/man/man1/git-lfs-lock.1 b/git-lfs-3.2.0/man/man1/git-lfs-lock.1 new file mode 100644 index 0000000000000000000000000000000000000000..33ac16393211a7a0a64a4899eab1fbb4f19922fd --- /dev/null +++ b/git-lfs-3.2.0/man/man1/git-lfs-lock.1 @@ -0,0 +1,22 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-LOCK" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-lock\fR \- Set a file as "locked" on the Git LFS server +.SH "SYNOPSIS" +\fBgit lfs lock\fR \fIoptions\fR \fIpath\fR +.SH "DESCRIPTION" +Sets the given file path as "locked" against the Git LFS server, with the intention of blocking attempts by other users to update the given path\. Locking a file requires the file to exist in the working copy\. +.P +Once locked, LFS will verify that Git pushes do not modify files locked by other users\. See the description of the \fBlfs\.\.locksverify\fR config key in git\-lfs\-config(5) for details\. +.SH "OPTIONS" +.TP +\fB\-r\fR \fIname\fR \fB\-\-remote=\fR\fIname\fR +Specify the Git LFS server to use\. Ignored if the \fBlfs\.url\fR config key is set\. +.TP +\fB\-\-json\fR +Writes lock info as JSON to STDOUT if the command exits successfully\. Intended for interoperation with external tools\. If the command returns with a non\-zero exit code, plain text messages will be sent to STDERR\. +.SH "SEE ALSO" +git\-lfs\-unlock(1), git\-lfs\-locks(1)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/git-lfs-3.2.0/man/man1/git-lfs-locks.1 b/git-lfs-3.2.0/man/man1/git-lfs-locks.1 new file mode 100644 index 0000000000000000000000000000000000000000..3d651a0bc3ea7c6561e60fc5c3744ed686639b2c --- /dev/null +++ b/git-lfs-3.2.0/man/man1/git-lfs-locks.1 @@ -0,0 +1,38 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-LOCKS" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-locks\fR \- Lists currently locked files from the Git LFS server\. +.SH "SYNOPSIS" +\fBgit lfs locks\fR \fIoptions\fR +.SH "DESCRIPTION" +Lists current locks from the Git LFS server\. +.SH "OPTIONS" +.TP +\fB\-r\fR \fIname\fR \fB\-\-remote=\fR\fIname\fR +Specify the Git LFS server to use\. Ignored if the \fBlfs\.url\fR config key is set\. +.TP +\fB\-i \fR \fB\-\-id=\fR +Specifies a lock by its ID\. Returns a single result\. +.TP +\fB\-p \fR \fB\-\-path=\fR +Specifies a lock by its path\. Returns a single result\. +.TP +\fB\-\-local\fR +Lists only our own locks which are cached locally\. Skips a remote call\. +.TP +\fB\-\-cached\fR +Lists cached locks from the last remote call\. Contrary to \-\-local, this will include locks of other users as well\. This option is intended to display the last known locks in case you are offline\. There is no guarantee that locks on the server have not changed in the meanwhile\. +.TP +\fB\-\-verify\fR +Verifies the lock owner on the server and marks our own locks by \'O\'\. Own locks are actually held by us and corresponding files can be updated for the next push\. All other locks are held by someone else\. Contrary to \-\-local, this option will also detect locks which are held by us despite no local lock information being available (e\.g\. because the file had been locked from a different clone); it will also detect \'broken\' locks (e\.g\. if someone else has forcefully unlocked our files)\. +.TP +\fB\-l \fR \fB\-\-limit=\fR +Specifies number of results to return\. +.TP +\fB\-\-json\fR +Writes lock info as JSON to STDOUT if the command exits successfully\. Intended for interoperation with external tools\. If the command returns with a non\-zero exit code, plain text messages will be sent to STDERR\. +.SH "SEE ALSO" +git\-lfs\-lock(1), git\-lfs\-unlock(1)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/git-lfs-3.2.0/man/man1/git-lfs-logs.1 b/git-lfs-3.2.0/man/man1/git-lfs-logs.1 new file mode 100644 index 0000000000000000000000000000000000000000..de5e867b5ad1dfbf065290693c71c365946b1f47 --- /dev/null +++ b/git-lfs-3.2.0/man/man1/git-lfs-logs.1 @@ -0,0 +1,30 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-LOGS" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-logs\fR \- Show errors from the git\-lfs command +.SH "SYNOPSIS" +\fBgit lfs logs\fR +.br +\fBgit lfs logs\fR \fIfile\fR +.br +\fBgit lfs logs clear\fR +.br +\fBgit lfs logs boomtown\fR +.br +.SH "DESCRIPTION" +Display errors from the git\-lfs command\. Any time it crashes, the details are saved to "\.git/lfs/logs"\. +.SH "COMMANDS" +.TP +\fBclear\fR +Clears all of the existing logged errors\. +.TP +\fBboomtown\fR +Triggers a dummy exception\. +.SH "OPTIONS" +Without any options, \fBgit lfs logs\fR simply shows the list of error logs\. +.TP +\fIfile\fR +Shows the specified error log\. Use "last" to show the most recent error\. +.SH "SEE ALSO" +Part of the git\-lfs(1) suite\. diff --git a/git-lfs-3.2.0/man/man1/git-lfs-ls-files.1 b/git-lfs-3.2.0/man/man1/git-lfs-ls-files.1 new file mode 100644 index 0000000000000000000000000000000000000000..e94ee72c0d366b190014106058566dcaa211f331 --- /dev/null +++ b/git-lfs-3.2.0/man/man1/git-lfs-ls-files.1 @@ -0,0 +1,42 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-LS\-FILES" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-ls\-files\fR \- Show information about Git LFS files in the index and working tree +.SH "SYNOPSIS" +\fBgit lfs ls\-files\fR [\fIref\fR] +.br +\fBgit lfs ls\-files\fR \fIref\fR \fIref\fR +.SH "DESCRIPTION" +Display paths of Git LFS files that are found in the tree at the given reference\. If no reference is given, scan the currently checked\-out branch\. If two references are given, the LFS files that are modified between the two references are shown; deletions are not listed\. +.P +An asterisk (*) after the OID indicates a full object, a minus (\-) indicates an LFS pointer\. +.SH "OPTIONS" +.TP +\fB\-l\fR \fB\-\-long\fR +Show the entire 64 character OID, instead of just first 10\. +.TP +\fB\-s\fR \fB\-\-size\fR +Show the size of the LFS object between parenthesis at the end of a line\. +.TP +\fB\-d\fR \fB\-\-debug\fR +Show as much information as possible about a LFS file\. This is intended for manual inspection; the exact format may change at any time\. +.TP +\fB\-a\fR \fB\-\-all\fR +Inspects the full history of the repository, not the current HEAD (or other provided reference)\. This will include previous versions of LFS objects that are no longer found in the current tree\. +.TP +\fB\-\-deleted\fR +Shows the full history of the given reference, including objects that have been deleted\. +.TP +\fB\-I\fR \fIpaths\fR \fB\-\-include=\fR\fIpaths\fR +Include paths matching only these patterns; see [FETCH SETTINGS]\. +.TP +\fB\-X\fR \fIpaths\fR \fB\-\-exclude=\fR\fIpaths\fR +Exclude paths matching any of these patterns; see [FETCH SETTINGS]\. +.TP +\fB\-n\fR \fB\-\-name\-only\fR +Show only the lfs tracked file names\. ## SEE ALSO +.P +git\-lfs\-status(1), git\-lfs\-config(5)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/git-lfs-3.2.0/man/man1/git-lfs-merge-driver.1 b/git-lfs-3.2.0/man/man1/git-lfs-merge-driver.1 new file mode 100644 index 0000000000000000000000000000000000000000..33e9acade6891be5eb7daba0080ccc6ca7a7b3cf --- /dev/null +++ b/git-lfs-3.2.0/man/man1/git-lfs-merge-driver.1 @@ -0,0 +1,58 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-MERGE\-DRIVER" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-merge\-driver\fR \- Merge text\-based LFS files +.SH "SYNOPSIS" +\fBgit lfs merge\-driver\fR \fIoptions\fR +.SH "DESCRIPTION" +Merge text files stored in Git LFS using the default Git merge machinery, or a custom merge driver if specified\. Note that this, in general, does not support partial renames or copies because Git does not support them in this case\. +.P +This program is intended to be invoked automatically by Git and not by users manually\. See \fICONFIGURATION\fR for details on the configuration required for that\. +.SH "OPTIONS" +.IP "\[ci]" 4 +\fB\-\-ancestor\fR \fIpath\fR Specify the file containing the ancestor revision\. +.IP "\[ci]" 4 +\fB\-\-current\fR \fIpath\fR Specify the file containing the current revision\. +.IP "\[ci]" 4 +\fB\-\-marker\-size\fR \fInum\fR Specify the conflict marker size as an integer\. +.IP "\[ci]" 4 +\fB\-\-other\fR \fIpath\fR Specify the file containing the other revision\. +.IP "\[ci]" 4 +\fB\-\-program\fR \fIprogram\fR Specify a command, which is passed to the shell after substitution, that performs the actual merge\. If this is not specified, \fBgit merge\-file\fR is invoked with appropriate arguments to perform the merge of the file\. +.IP +See \fICONFIGURATION\fR for the sequences which are substituted here\. +.IP "" 0 +.SH "CONFIGURATION" +Git allows the use of a custom merge driver for files based on the \fBmerge\fR attribute set in \fB\.gitattributes\fR\. By default, when using \fBgit lfs track\fR, this value is set to \fBlfs\fR\. +.P +Because Git LFS can be used to store both text and binary files and it isn\'t always clear which behavior should be used, Git LFS does not enable this merge driver by default\. However, if you know that some or all of your files are text files, then you can set the \fBmerge\fR attribute for those files to \fBlfs\-text\fR and use \fBgit config\fR to set the merge driver like so: +.P +\fBconsole $ git config merge\.lfs\-text\.driver \'git lfs merge\-driver \-\-ancestor %O \-\-current %A \-\-other %B \-\-marker\-size %L \-\-output %A\'\fR +.P +This tells Git to invoke the custom Git LFS merge driver, which in turn uses Git\'s merge machinery, to merge files where the \fBmerge\fR attribute is set to \fBlfs\-text\fR\. Note that \fBlfs\-text\fR here is an example and any syntactically valid value can be used\. +.P +If you are using a special type of file that needs rules different from Git\'s standard merge machinery, you can also specify the \fB\-\-program\fR option, which is passed to \fBsh\fR after substituting its own percent\-encoded escapes: +.IP "\[ci]" 4 +\fB%A\fR: the current version +.IP "\[ci]" 4 +\fB%B\fR: the other version +.IP "\[ci]" 4 +\fB%D\fR: the destination version +.IP "\[ci]" 4 +\fB%O\fR: the ancestor version +.IP "\[ci]" 4 +\fB%L\fR: the conflict marker size +.IP "" 0 +.P +Note that the percent sign must typically be doubled to prevent Git from substituting its own values here\. Therefore, specifying the default behavior explicitly looks like this: +.P +\fBconsole $ git config merge\.lfs\-text\.driver \e \'git lfs merge\-driver \-\-ancestor %O \-\-current %A \-\-other %B \-\-marker\-size %L \-\-output %A \-\-program \'\e\'\'git merge\-file \-\-stdout \-\-marker\-size=%%L %%A %%O %%B >%%D\'\e\'\'\'\fR +.P +The exit status from the custom command should be zero on success or non\-zero on conflicts or other failure\. +.P +Note that if no merge driver is specified for the value of the \fBmerge\fR attribute (as is the case by default with \fBmerge=lfs\fR), then the default Git merge strategy is used\. For LFS files, this means that Git will try to merge the pointer files, which usually is not useful\. +.SH "SEE ALSO" +git\-merge(1), git\-merge\-file(1), gitattributes(5) +.P +Part of the git\-lfs(1) suite\. diff --git a/git-lfs-3.2.0/man/man1/git-lfs-migrate.1 b/git-lfs-3.2.0/man/man1/git-lfs-migrate.1 new file mode 100644 index 0000000000000000000000000000000000000000..87be8e2b8822d511b649400b2927ed8eca7f6187 --- /dev/null +++ b/git-lfs-3.2.0/man/man1/git-lfs-migrate.1 @@ -0,0 +1,191 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-MIGRATE" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-migrate\fR \- Migrate history to or from Git LFS +.SH "SYNOPSIS" +\fBgit lfs migrate\fR \fImode\fR [options] [\-\-] [branch \|\.\|\.\|\.] +.SH "DESCRIPTION" +Convert files in a Git repository to or from Git LFS pointers, or summarize Git file sizes by file type\. The \fBimport\fR mode converts Git files (i\.e\., blobs) to Git LFS, while the \fBexport\fR mode does the reverse, and the \fBinfo\fR mode provides an informational summary which may be useful in deciding which files to import or export\. +.P +In all modes, by default \fBgit lfs migrate\fR operates only on the currently checked\-out branch, and only on files (of any size and type) added in commits which do not exist on any remote\. Multiple options are available to override these defaults\. +.P +When converting files to or from Git LFS, the \fBgit lfs migrate\fR command will only make changes to your local repository and working copy, never any remotes\. This is intentional as the \fBimport\fR and \fBexport\fR modes are generally "destructive" in the sense that they rewrite your Git history, changing commits and generating new commit SHAs\. (The exception is the "no\-rewrite" \fBimport\fR sub\-mode; see \fIIMPORT WITHOUT REWRITING HISTORY\fR for details\.) +.P +You should therefore always first commit or stash any uncommitted work before using the \fBimport\fR or \fBexport\fR modes, and then validate the result of the migration before pushing the changes to your remotes, for instance by running the \fBinfo\fR mode and by examining your rewritten commit history\. +.P +Once you are satisfied with the changes, you will need to force\-push the new Git history of any rewritten branches to all your remotes\. This is a step which should be taken with care, since you will be altering the Git history on your remotes\. +.P +To examine or modify files in branches other than the currently checked\-out one, branch refs may be specified directly, or provided in one or more \fB\-\-include\-ref\fR options\. They may also be excluded by prefixing them with \fB^\fR or providing them in \fB\-\-exclude\-ref\fR options\. Use the \fB\-\-everything\fR option to specify that all refs should be examined, including all remote refs\. See \fIINCLUDE AND EXCLUDE REFERENCES\fR for details\. +.P +For the \fBinfo\fR and \fBimport\fR modes, all file types are considered by default; while useful in the \fBinfo\fR mode, this is often not desirable when importing, so either filename patterns (pathspecs) or the \fB\-\-fixup\fR option should normally be specified in that case\. (At least one include pathspec is required for the \fBexport\fR mode\.) Pathspecs may be defined using the \fB\-\-include\fR and \fB\-\-exclude\fR options (\fB\-I\fR and \fB\-X\fR for short), as described in \fIINCLUDE AND EXCLUDE\fR\. +.P +As typical Git LFS usage depends on tracking specific file types using filename patterns defined in \fB\.gitattributes\fR files, the \fBgit lfs migrate\fR command will examine, create, and modify \fB\.gitattributes\fR files as necessary\. The \fB\.gitattributes\fR files will always be assigned the default read/write permissions mode (i\.e\., without execute permissions)\. Any symbolic links with that name will cause the migration to halt prematurely\. +.P +The \fBimport\fR mode (see \fIIMPORT\fR) will convert Git objects of the file types specified (e\.g\., with \fB\-\-include\fR) to Git LFS pointers, and will add entries for those file types to \fB\.gitattributes\fR files, creating those files if they do not exist\. The result should be as if \fBgit lfs track\fR commands had been run at the points in your Git history corresponding to where each type of converted file first appears\. The exception is if the \fB\-\-fixup\fR option is given, in which case the \fBimport\fR mode will only examine any existing \fB\.gitattributes\fR files and then convert Git objects which should be tracked by Git LFS but are not yet\. +.P +The \fBexport\fR mode (see \fIEXPORT\fR) works as the reverse operation to the \fBimport\fR mode, converting any Git LFS pointers that match the file types specified with \fB\-\-include\fR, which must be given at least once\. Note that \fB\.gitattributes\fR entries will not be removed, nor will the files; instead, the \fBexport\fR mode inserts "do not track" entries similar to those created by the \fBgit lfs untrack\fR command\. The \fB\-\-remote\fR option is available in the \fBexport\fR mode to specify the remote from which Git LFS objects should be fetched if they do not exist in the local Git LFS object cache; if not provided, \fBorigin\fR is used by default\. +.P +The \fBinfo\fR mode (see \fIINFO\fR) summarizes by file type (i\.e\., by filename extension) the total number and size of files in a repository\. Note that like the other two modes, by default the \fBinfo\fR mode operates only on the currently checked\-out branch and only on commits which do not exist on any remote, so to get a summary of the entire repository across all branches, use the \fB\-\-everything\fR option\. If objects have already been converted to Git LFS pointers, then by default the size of the referenced objects is totaled and reported separately\. You may also choose to ignore them by using \fB\-\-pointers=ignore\fR or to treat the pointers as files by using \fB\-\-pointers=no\-follow\fR\. (The latter option is akin to how existing Git LFS pointers were handled by the \fBinfo\fR mode in prior versions of Git LFS)\. +.P +When using the \fB\-\-everything\fR option, take note that it means all refs (local and remote) will be considered, but not necessarily all file types\. The \fBimport\fR and \fBinfo\fR modes consider all file types by default, although the \fB\-\-include\fR and \fB\-\-exclude\fR options constrain this behavior\. Also note that after importing across all branches with the \fB\-\-everything\fR option (and then checking to ensure the results are satisfactory!) it may be convenient to update multiple branches on your remotes by using the \fB\-\-all\fR option to \fBgit push\fR\. +.P +Unless the \fB\-\-skip\-fetch\fR option is given, \fBgit lfs migrate\fR always begins by fetching updated lists of refs from all the remotes returned by \fBgit remote\fR, but as noted above, after making changes to your local Git history while converting objects, it will never automatically push those changes to your remotes\. +.SH "MODES" +.IP "\[ci]" 4 +\fBinfo\fR Show information about repository size\. See \fIINFO\fR\. +.IP "\[ci]" 4 +\fBimport\fR Convert Git objects to Git LFS pointers\. See \fIIMPORT\fR and \fIIMPORT WITHOUT REWRITING HISTORY\fR\. +.IP "\[ci]" 4 +\fBexport\fR Convert Git LFS pointers to Git objects\. See \fIEXPORT\fR\. +.IP "" 0 +.SH "OPTIONS" +.TP +\fB\-I\fR \fIpaths\fR \fB\-\-include=\fR\fIpaths\fR +See \fIINCLUDE AND EXCLUDE\fR\. +.TP +\fB\-X\fR \fIpaths\fR \fB\-\-exclude=\fR\fIpaths\fR +See \fIINCLUDE AND EXCLUDE\fR\. +.TP +\fB\-\-include\-ref\fR=\fIrefname\fR +See \fIINCLUDE AND EXCLUDE REFERENCES\fR\. +.TP +\fB\-\-exclude\-ref\fR=\fIrefname\fR +See \fIINCLUDE AND EXCLUDE REFERENCES\fR\. +.TP +\fB\-\-skip\-fetch\fR +Assumes that the known set of remote references is complete, and should not be refreshed when determining the set of "un\-pushed" commits to migrate\. Has no effect when combined with \fB\-\-include\-ref\fR or \fB\-\-exclude\-ref\fR\. +.TP +\fB\-\-everything\fR +See \fIINCLUDE AND EXCLUDE REFERENCES\fR\. +.IP +Note: Git refs are "case\-sensitive" on all platforms in "packed from" (see \fBgit\-pack\-refs(1)\fR)\. On "case\-insensitive" file systems, e\.g\. NTFS on Windows or default APFS on macOS, \fBgit\-lfs\-migrate(1)\fR would only migrate the first ref if two or more refs are equal except for upper/lower case letters\. +.TP +\fB\-\-yes\fR +Assume a yes answer to any prompts, permitting noninteractive use\. Currently, the only such prompt is the one asking whether to overwrite (destroy) any working copy changes\. Thus, specifying this option may cause data loss if you are not careful\. +.TP +[branch \|\.\|\.\|\.] +Migrate only the set of branches listed\. If not given, \fBgit\-lfs\-migrate(1)\fR will migrate the currently checked out branch\. +.IP +References beginning with \fB^\fR will be excluded, whereas branches that do not begin with \fB^\fR will be included\. +.IP +If any of \fB\-\-include\-ref\fR or \fB\-\-exclude\-ref\fR are given, the checked out branch will not be appended, but branches given explicitly will be appended\. +.SS "INFO" +The \fBinfo\fR mode summarizes the sizes of file objects present in the Git history\. It supports all the core \fBmigrate\fR options and these additional ones: +.IP "\[ci]" 4 +\fB\-\-above=\fR Only count files whose individual filesize is above the given size\. \fBsize\fR may be specified as a number of bytes, or a number followed by a storage unit, e\.g\., "1b", "20 MB", "3 TiB", etc\. +.IP +If a set of files sharing a common extension has no files in that set whose individual size is above the given \fB\-\-above\fR no files no entry for that set will be shown\. +.IP "\[ci]" 4 +\fB\-\-top=\fR Only display the top \fBn\fR entries, ordered by how many total files match the given pathspec\. The default is to show only the top 5 entries\. When existing Git LFS objects are found, an extra, separate "LFS Objects" line is output in addition to the top \fBn\fR entries, unless the \fB\-\-pointers\fR option is used to change this behavior\. +.IP "\[ci]" 4 +\fB\-\-unit=\fR Format the number of bytes in each entry as a quantity of the storage unit provided\. Valid units include: * b, kib, mib, gib, tib, pib \- for IEC storage units * b, kb, mb, gb, tb, pb \- for SI storage units +.IP +If a \fB\-\-unit\fR is not specified, the largest unit that can fit the number of counted bytes as a whole number quantity is chosen\. +.IP "\[ci]" 4 +\fB\-\-pointers=[follow|no\-follow|ignore]\fR Treat existing Git LFS pointers in the history according to one of three alternatives\. In the default \fBfollow\fR case, if any pointers are found, an additional separate "LFS Objects" line item is output which summarizes the total number and size of the Git LFS objects referenced by pointers\. In the \fBignore\fR case, any pointers are simply ignored, while the \fBno\-follow\fR case replicates the behavior of the \fBinfo\fR mode in older Git LFS versions and treats any pointers it finds as if they were regular files, so the output totals only include the contents of the pointers, not the contents of the objects to which they refer\. +.IP "\[ci]" 4 +\fB\-\-fixup\fR Infer \fB\-\-include\fR and \fB\-\-exclude\fR filters on a per\-commit basis based on the \fB\.gitattributes\fR files in a repository\. In practice, this option counts any filepaths which should be tracked by Git LFS according to the repository\'s \fB\.gitattributes\fR file(s), but aren\'t already pointers\. The \fB\.gitattributes\fR files are not reported, in contrast to the normal output of the \fBinfo\fR mode\. This option is incompatible with explicitly given \fB\-\-include\fR, \fB\-\-exclude\fR filters and with any \fB\-\-pointers\fR setting other than \fBignore\fR, hence \fB\-\-fixup\fR implies \fB\-\-pointers=ignore\fR if it is not explicitly set\. +.IP "" 0 +.P +The format of the output shows the filename pattern, the total size of the file objects (excluding those below the \fB\-\-above\fR threshold, if one was defined), and the ratio of the number of files above the threshold to the total number of files; this ratio is also shown as a percentage\. For example: +.P +\fB*\.gif 93 MB 9480/10504 files(s) 90% *\.png 14 MB 1732/1877 files(s) 92%\fR +.P +By default only the top five entries are shown, but \fB\-\-top\fR allows for more or fewer to be output as desired\. +.SS "IMPORT" +The \fBimport\fR mode migrates objects present in the Git history to pointer files tracked and stored with Git LFS\. It supports all the core \fBmigrate\fR options and these additional ones: +.IP "\[ci]" 4 +\fB\-\-verbose\fR Print the commit oid and filename of migrated files to STDOUT\. +.IP "\[ci]" 4 +\fB\-\-above=\fR Only migrate files whose individual filesize is above the given size\. \fBsize\fR may be specified as a number of bytes, or a number followed by a storage unit, e\.g\., "1b", "20 MB", "3 TiB", etc\. This option cannot be used with the \fB\-\-include\fR, \fB\-\-exclude\fR, and \fB\-\-fixup\fR options\. +.IP "\[ci]" 4 +\fB\-\-object\-map=\fR Write to \fBpath\fR a file with the mapping of each rewritten commits\. The file format is CSV with this pattern: \fBOLD\-SHA\fR,\fBNEW\-SHA\fR +.IP "\[ci]" 4 +\fB\-\-no\-rewrite\fR Migrate objects to Git LFS in a new commit without rewriting Git history\. Please note that when this option is used, the \fBmigrate import\fR command will expect a different argument list, specialized options will become available, and the core \fBmigrate\fR options will be ignored\. See \fIIMPORT WITHOUT REWRITING HISTORY\fR\. +.IP "\[ci]" 4 +\fB\-\-fixup\fR Infer \fB\-\-include\fR and \fB\-\-exclude\fR filters on a per\-commit basis based on the \fB\.gitattributes\fR files in a repository\. In practice, this option imports any filepaths which should be tracked by Git LFS according to the repository\'s \fB\.gitattributes\fR file(s), but aren\'t already pointers\. This option is incompatible with explicitly given \fB\-\-include\fR, \fB\-\-exclude\fR filters\. +.IP "" 0 +.P +If \fB\-\-no\-rewrite\fR is not provided and \fB\-\-include\fR or \fB\-\-exclude\fR (\fB\-I\fR, \fB\-X\fR, respectively) are given, the \fB\.gitattributes\fR will be modified to include any new filepath patterns as given by those flags\. +.P +If \fB\-\-no\-rewrite\fR is not provided and neither of those flags are given, the gitattributes will be incrementally modified to include new filepath extensions as they are rewritten in history\. +.SS "IMPORT WITHOUT REWRITING HISTORY" +The \fBimport\fR mode has a special sub\-mode enabled by the \fB\-\-no\-rewrite\fR flag\. This sub\-mode will migrate objects to pointers as in the base \fBimport\fR mode, but will do so in a new commit without rewriting Git history\. When using this sub\-mode, the base \fBmigrate\fR options, such as \fB\-\-include\-ref\fR, will be ignored, as will those for the base \fBimport\fR mode\. The \fBmigrate\fR command will also take a different argument list\. As a result of these changes, \fB\-\-no\-rewrite\fR will only operate on the current branch \- any other interested branches must have the generated commit merged in\. +.P +The \fB\-\-no\-rewrite\fR sub\-mode supports the following options and arguments: +.IP "\[ci]" 4 +\fB\-m \-\-message=\fR Specifies a commit message for the newly created commit\. +.IP "\[ci]" 4 +[file \|\.\|\.\|\.] The list of files to import\. These files must be tracked by patterns specified in the gitattributes\. +.IP "" 0 +.P +If \fB\-\-message\fR is given, the new commit will be created with the provided message\. If no message is given, a commit message will be generated based on the file arguments\. +.SS "EXPORT" +The \fBexport\fR mode migrates Git LFS pointer files present in the Git history out of Git LFS, converting them into their corresponding object files\. It supports all the core \fBmigrate\fR options and these additional ones: +.IP "\[ci]" 4 +\fB\-\-verbose\fR Print the commit oid and filename of migrated files to STDOUT\. +.IP "\[ci]" 4 +\fB\-\-object\-map=\fR Write to \fBpath\fR a file with the mapping of each rewritten commit\. The file format is CSV with this pattern: \fBOLD\-SHA\fR,\fBNEW\-SHA\fR +.IP "\[ci]" 4 +\fB\-\-remote=\fR Download LFS objects from the provided \fBgit\-remote\fR during the export\. If not provided, defaults to \fBorigin\fR\. +.IP "" 0 +.P +The \fBexport\fR mode requires at minimum a pattern provided with the \fB\-\-include\fR argument to specify which files to export\. Files matching the \fB\-\-include\fR patterns will be removed from Git LFS, while files matching the \fB\-\-exclude\fR patterns will retain their Git LFS status\. The export command will modify the \fB\.gitattributes\fR to set/unset any filepath patterns as given by those flags\. +.SH "INCLUDE AND EXCLUDE" +You can specify that \fBgit lfs migrate\fR should only convert files whose pathspec matches the \fB\-\-include\fR glob patterns and does not match the \fB\-\-exclude\fR glob patterns, either to reduce total migration time or to only migrate part of your repo\. Multiple patterns may be given using commas as delimiters\. +.P +Pattern matching is done so as to be functionally equivalent to the pattern matching format of \fB\.gitattributes\fR\. In addition to simple file extension matches (e\.g\., \fB*\.gif\fR) patterns may also specify directory paths, in which case the \fBpath/**\fR format may be used to match recursively\. +.P +Note that this form of pattern matching for the \fB\-\-include\fR and \fB\-\-exclude\fR options used by the \fBgit lfs migrate\fR command is unique among the suite of \fBgit lfs\fR commands\. Other commands which also take these options, such as \fBgit lfs ls\-files\fR, use the gitignore(5) form of pattern matching instead\. +.SH "INCLUDE AND EXCLUDE REFERENCES" +You can specify that \fBgit lfs migrate\fR should only convert files added in commits reachable from certain references, namely those defined using one or more \fB\-\-include\-ref\fR options, and should ignore files in commits reachable from references defined in \fB\-\-exclude\-ref\fR options\. +.P +\fBD\-\-\-E\-\-\-F / \e A\-\-\-B\-\-\-\-\-\-C refs/heads/my\-feature \e \e \e refs/heads/main \e refs/remotes/origin/main\fR +.P +In the above configuration, the following commits are reachable by each ref: +.P +\fBrefs/heads/main: C, B, A refs/heads/my\-feature: F, E, D, B, A refs/remote/origin/main: A\fR +.P +The following \fBgit lfs migrate\fR options would, therefore, include commits F, E, D, C, and B, but exclude commit A: +.P +\fB\-\-include\-ref=refs/heads/my\-feature \-\-include\-ref=refs/heads/main \-\-exclude\-ref=refs/remotes/origin/main\fR +.P +The presence of flag \fB\-\-everything\fR indicates that all local and remote references should be migrated\. +.SH "EXAMPLES" +.SS "Migrate unpushed commits" +A common use case for the migrate command is to convert large Git objects to LFS before pushing your commits\. By default, it only scans commits that don\'t exist on any remote, so long as the repository is non\-bare\. +.P +First, run \fBgit lfs migrate info\fR to list the file types taking up the most space in your repository: +.P +\fB$ git lfs migrate info migrate: Fetching remote refs: \|\.\|\.\|\., done migrate: Sorting commits: \|\.\|\.\|\., done migrate: Examining commits: 100% (1/1), done *\.mp3 284 MB 1/1 files(s) 100% *\.pdf 42 MB 8/8 files(s) 100% *\.psd 9\.8 MB 15/15 files(s) 100% *\.ipynb 6\.9 MB 6/6 files(s) 100% *\.csv 5\.8 MB 2/2 files(s) 100%\fR +.P +Now, you can run \fBgit lfs migrate import\fR to convert some file types to LFS: +.P +\fB$ git lfs migrate import \-\-include="*\.mp3,*\.psd" migrate: Fetching remote refs: \|\.\|\.\|\., done migrate: Sorting commits: \|\.\|\.\|\., done migrate: Rewriting commits: 100% (1/1), done main d2b959babd099fe70da1c1512e2475e8a24de163 \-> 136e706bf1ae79643915c134e17a6c933fd53c61 migrate: Updating refs: \|\.\|\.\|\., done\fR +.P +If after conversion you find that some files in your working directory have been replaced with Git LFS pointers, this is normal, and the working copies of these files can be repopulated with their full expected contents by using \fBgit lfs checkout\fR\. +.SS "Migrate local history" +You can also migrate the entire history of your repository: +.P +``` # Check for large files and existing Git LFS objects in your local main branch $ git lfs migrate info \-\-include\-ref=main$ git lfs migrate info \-\-everything# and listing the top 100 or fewer results $ git lfs migrate info \-\-everything \-\-pointers=ignore \-\-top=100 ``` +.P +The same flags will work in \fBimport\fR mode: +.P +``` # Convert all zip files in your main branch $ git lfs migrate import \-\-include\-ref=main \-\-include="*\.zip"$ git lfs migrate import \-\-everything \-\-include="*\.zip"$ git lfs migrate import \-\-everything \-\-above=100Kb ``` +.P +Note: This will require a force\-push to any existing Git remotes\. Using the \fB\-\-all\fR option when force\-pushing may be convenient if many refs were updated, e\.g\., after importing to Git LFS with the \fB\-\-everything\fR option\. +.SS "Migrate without rewriting local history" +You can also migrate files without modifying the existing history of your repository\. Note that in the examples below, files in subdirectories are not included because they are not explicitly specified\. +.P +Without a specified commit message: +.P +\fB$ git lfs migrate import \-\-no\-rewrite test\.zip *\.mp3 *\.psd\fR +.P +With a specified commit message: +.P +\fB$ git lfs migrate import \-\-no\-rewrite \e \-m "Import test\.zip, \.mp3, \.psd files in root of repo" \e test\.zip *\.mp3 *\.psd\fR +.SH "SEE ALSO" +git\-lfs\-checkout(1), git\-lfs\-ls\-files(1), git\-lfs\-track(1), git\-lfs\-untrack(1), gitattributes(5), gitignore(5)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/git-lfs-3.2.0/man/man1/git-lfs-pointer.1 b/git-lfs-3.2.0/man/man1/git-lfs-pointer.1 new file mode 100644 index 0000000000000000000000000000000000000000..36be15c942cbcc3297d47ffc28bfdf9410d81ae0 --- /dev/null +++ b/git-lfs-3.2.0/man/man1/git-lfs-pointer.1 @@ -0,0 +1,34 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-POINTER" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-pointer\fR \- Build, compare, and check pointers +.SH "SYNOPSIS" +\fBgit lfs pointer \-\-file=path/to/file\fR +.br +\fBgit lfs pointer \-\-file=path/to/file \-\-pointer=path/to/pointer\fR +.br +\fBgit lfs pointer \-\-file=path/to/file \-\-stdin\fR \fBgit lfs pointer \-\-check \-\-file=path/to/file\fR +.SH "Description" +Builds and optionally compares generated pointer files to ensure consistency between different Git LFS implementations\. +.SH "OPTIONS" +.TP +\fB\-\-file\fR +A local file to build the pointer from\. +.TP +\fB\-\-pointer\fR +A local file including the contents of a pointer generated from another implementation\. This is compared to the pointer generated from \fB\-\-file\fR\. +.TP +\fB\-\-stdin\fR +Reads the pointer from STDIN to compare with the pointer generated from \fB\-\-file\fR\. +.TP +\fB\-\-check\fR +Reads the pointer from STDIN (if \fB\-\-stdin\fR is given) or the filepath (if \fB\-\-file\fR) is given\. If neither or both of \fB\-\-stdin\fR and \fB\-\-file\fR are given, the invocation is invalid\. Exits 0 if the data read is a valid Git LFS pointer\. Exits 1 otherwise\. +.TP +\fB\-\-strict\fR: + +.TP +\fB\-\-no\-strict\fR +In conjunction with \fB\-\-check\fR, \fB\-\-strict\fR verifies that the pointer is canonical; that is, it would be the one created by Git LFS\. If it is not, exits 2\. The default, for backwards compatibility, is \fB\-\-no\-strict\fR, but this may change in a future version\. +.SH "SEE ALSO" +Part of the git\-lfs(1) suite\. diff --git a/git-lfs-3.2.0/man/man1/git-lfs-post-checkout.1 b/git-lfs-3.2.0/man/man1/git-lfs-post-checkout.1 new file mode 100644 index 0000000000000000000000000000000000000000..a1ec45e409080fb62e3b6243cfdeee45e317d53d --- /dev/null +++ b/git-lfs-3.2.0/man/man1/git-lfs-post-checkout.1 @@ -0,0 +1,13 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-POST\-CHECKOUT" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-post\-checkout\fR \- Git post\-checkout hook implementation +.SH "SYNOPSIS" +\fBgit lfs post\-checkout\fR \fIrev_before\fR \fIref_after\fR \fIis_branch_checkout\fR +.SH "DESCRIPTION" +Responds to Git post\-checkout events\. It makes sure that any files which are marked as lockable by \fBgit lfs track\fR are read\-only in the working copy, if not currently locked by the local user\. +.SH "SEE ALSO" +git\-lfs\-track(1) +.P +Part of the git\-lfs(1) suite\. diff --git a/git-lfs-3.2.0/man/man1/git-lfs-post-commit.1 b/git-lfs-3.2.0/man/man1/git-lfs-post-commit.1 new file mode 100644 index 0000000000000000000000000000000000000000..447b79970102bd1632da9cdd34099c168a142ce6 --- /dev/null +++ b/git-lfs-3.2.0/man/man1/git-lfs-post-commit.1 @@ -0,0 +1,15 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-POST\-COMMIT" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-post\-commit\fR \- Git post\-commit hook implementation +.SH "SYNOPSIS" +\fBgit lfs post\-commit\fR +.SH "DESCRIPTION" +Responds to Git post\-commit events\. It makes sure that any files which are marked as lockable by \fBgit lfs track\fR are read\-only in the working copy, if not currently locked by the local user\. +.P +Where the \fBgit lfs post\-merge\fR command, which has a similar purpose, must examine all files in the working copy, \fBgit lfs post\-commit\fR can limit itself checking only those files which have changed in \fBHEAD\fR\. It primarily handles newly added lockable files which have not yet been made read\-only\. +.SH "SEE ALSO" +git\-lfs\-post\-merge(1), git\-lfs\-track(1) +.P +Part of the git\-lfs(1) suite\. diff --git a/git-lfs-3.2.0/man/man1/git-lfs-post-merge.1 b/git-lfs-3.2.0/man/man1/git-lfs-post-merge.1 new file mode 100644 index 0000000000000000000000000000000000000000..c5ae4bdede5a6577be0461c533f89a33bf14e50c --- /dev/null +++ b/git-lfs-3.2.0/man/man1/git-lfs-post-merge.1 @@ -0,0 +1,13 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-POST\-MERGE" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-post\-merge\fR \- Git post\-merge hook implementation +.SH "SYNOPSIS" +\fBgit lfs post\-merge\fR \fIis_squash\fR +.SH "DESCRIPTION" +Responds to Git post\-merge events\. It makes sure that any files which are marked as lockable by \fBgit lfs track\fR are read\-only in the working copy, if not currently locked by the local user\. +.SH "SEE ALSO" +git\-lfs\-track(1) +.P +Part of the git\-lfs(1) suite\. diff --git a/git-lfs-3.2.0/man/man1/git-lfs-pre-push.1 b/git-lfs-3.2.0/man/man1/git-lfs-pre-push.1 new file mode 100644 index 0000000000000000000000000000000000000000..d030265f488ed507483f9356ace54a3a269b9912 --- /dev/null +++ b/git-lfs-3.2.0/man/man1/git-lfs-pre-push.1 @@ -0,0 +1,30 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-PRE\-PUSH" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-pre\-push\fR \- Git pre\-push hook implementation +.SH "SYNOPSIS" +\fBgit lfs pre\-push\fR \fIremote\fR [remoteurl] +.SH "DESCRIPTION" +Responds to Git pre\-hook events\. It reads the range of commits from STDIN, in the following format: +.IP "" 4 +.nf + SP SP SP \en +.fi +.IP "" 0 +.P +It also takes the remote name and URL as arguments\. +.P +If any of those Git objects are associated with Git LFS objects, those objects will be pushed to the Git LFS API\. +.P +In the case of pushing a new branch, the list of Git objects will be all of the Git objects in this branch\. +.P +In the case of deleting a branch, no attempts to push Git LFS objects will be made\. +.SH "OPTIONS" +.TP +\fBGIT_LFS_SKIP_PUSH\fR +Do nothing on pre\-push\. For more, see: git\-lfs\-config(5)\. +.SH "SEE ALSO" +git\-lfs\-clean(1), git\-lfs\-push(1)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/git-lfs-3.2.0/man/man1/git-lfs-prune.1 b/git-lfs-3.2.0/man/man1/git-lfs-prune.1 new file mode 100644 index 0000000000000000000000000000000000000000..5ac9759095f1d278bd2f3d3874fed214fb6e4c15 --- /dev/null +++ b/git-lfs-3.2.0/man/man1/git-lfs-prune.1 @@ -0,0 +1,83 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-PRUNE" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-prune\fR \- Delete old LFS files from local storage +.SH "SYNOPSIS" +\fBgit lfs prune\fR \fIoptions\fR +.SH "DESCRIPTION" +Deletes local copies of LFS files which are old, thus freeing up disk space\. Prune operates by enumerating all the locally stored objects, and then deleting any which are not referenced by at least ONE of the following: +.IP "\[ci]" 4 +the current checkout +.IP "\[ci]" 4 +all existing stashes +.IP "\[ci]" 4 +a \'recent branch\'; see \fIRECENT FILES\fR +.IP "\[ci]" 4 +a \'recent commit\' on the current branch or recent branches; see \fIRECENT FILES\fR +.IP "\[ci]" 4 +a commit which has not been pushed; see \fIUNPUSHED LFS FILES\fR +.IP "\[ci]" 4 +any other worktree checkouts; see git\-worktree(1) +.IP "" 0 +.P +In general terms, prune will delete files you\'re not currently using and which are not \'recent\', so long as they\'ve been pushed i\.e\. the local copy is not the only one\. +.P +The reflog is not considered, only commits\. Therefore LFS objects that are only referenced by orphaned commits are always deleted\. +.P +Note: you should not run \fBgit lfs prune\fR if you have different repositories sharing the same custom storage directory; see git\-lfs\-config(5) for more details about \fBlfs\.storage\fR option\. +.P +In your Git configuration or in a \fB\.lfsconfig\fR file, you may set \fBlfs\.fetchexclude\fR to a comma\-separated list of paths\. If \fBlfs\.fetchexclude\fR is defined, then any Git LFS files whose paths match one in that list will be pruned unless they are referenced by a stash or an unpushed commit\. Paths are matched using wildcard matching as per gitignore(5)\. +.SH "OPTIONS" +.IP "\[ci]" 4 +\fB\-\-dry\-run\fR \fB\-d\fR Don\'t actually delete anything, just report on what would have been done +.IP "\[ci]" 4 +\fB\-\-force\fR \fB\-f\fR Prune all objects except unpushed objects, including objects required for currently checked out refs\. Implies \fB\-\-recent\fR\. +.IP "\[ci]" 4 +\fB\-\-recent\fR Prune even objects that would normally be preserved by the configuration options specified below in \fIRECENT FILES\fR\. +.IP "\[ci]" 4 +\fB\-\-verify\-remote\fR \fB\-c\fR Contact the remote and check that copies of the files we would delete definitely exist before deleting\. See \fIVERIFY REMOTE\fR\. +.IP "\[ci]" 4 +\fB\-\-no\-verify\-remote\fR Disables remote verification if lfs\.pruneverifyremotealways was enabled in settings\. See \fIVERIFY REMOTE\fR\. +.IP "\[ci]" 4 +\fB\-\-verbose\fR \fB\-v\fR Report the full detail of what is/would be deleted\. +.IP "" 0 +.SH "RECENT FILES" +Prune won\'t delete LFS files referenced by \'recent\' commits, in case you want to use them again without having to download\. The definition of \'recent\' is derived from the one used by git\-lfs\-fetch(1) to download recent objects with the \fB\-\-recent\fR option, with an offset of a number of days (default 3) to ensure that we always keep files you download for a few days\. +.P +Here are the git\-config(1) settings that control this behaviour: +.IP "\[ci]" 4 +\fBlfs\.pruneoffsetdays\fR +.br +The number of extra days added to the fetch recent settings when using them to decide when to prune\. So for a reference to be considered old enough to prune, it has to be this many days older than the oldest reference that would be downloaded via \fBgit lfs fetch \-\-recent\fR\. Only used if the relevant fetch recent \'days\' setting is non\-zero\. Default 3 days\. +.IP "\[ci]" 4 +\fBlfs\.fetchrecentrefsdays\fR +.br +\fBlfs\.fetchrecentremoterefs\fR +.br +\fBlfs\.fetchrecentcommitsdays\fR +.br +These have the same meaning as git\-lfs\-fetch(1) with the \fB\-\-recent\fR option, they are used as a base for the offset above\. Anything which falls outside of this offsetted window is considered old enough to prune\. If a day value is zero, that condition is not used at all to retain objects and they will be pruned\. +.IP "" 0 +.SH "UNPUSHED LFS FILES" +When the only copy of an LFS file is local, and it is still reachable from any reference, that file can never be pruned, regardless of how old it is\. +.P +To determine whether an LFS file has been pushed, we check the difference between local refs and remote refs; where the local ref is ahead, any LFS files referenced in those commits is unpushed and will not be deleted\. This works because the LFS pre\-push hook always ensures that LFS files are pushed before the remote branch is updated\. +.P +See \fIDEFAULT REMOTE\fR, for which remote is considered \'pushed\' for pruning purposes\. +.SH "VERIFY REMOTE" +The \fB\-\-verify\-remote\fR option calls the remote to ensure that any LFS files to be deleted have copies on the remote before actually deleting them\. +.P +Usually the check performed by \fIUNPUSHED LFS FILES\fR is enough to determine that files have been pushed, but if you want to be extra sure at the expense of extra overhead you can make prune actually call the remote API and verify the presence of the files you\'re about to delete locally\. See \fIDEFAULT REMOTE\fR for which remote is checked\. +.P +You can make this behaviour the default by setting \fBlfs\.pruneverifyremotealways\fR to true\. +.P +In addition to the overhead of calling the remote, using this option also requires prune to distinguish between totally unreachable files (e\.g\. those that were added to the index but never committed, or referenced only by orphaned commits), and files which are still referenced, but by commits which are prunable\. This makes the prune process take longer\. +.SH "DEFAULT REMOTE" +When identifying \fIUNPUSHED LFS FILES\fR and performing \fIVERIFY REMOTE\fR, a single remote, \'origin\', is normally used as the reference\. This one remote is considered canonical; even if you use multiple remotes, you probably want to retain your local copies until they\'ve made it to that remote\. \'origin\' is used by default because that will usually be a main central repo, or your fork of it \- in both cases that\'s a valid remote backup of your work\. If origin doesn\'t exist then by default nothing will be pruned because everything is treated as \'unpushed\'\. +.P +You can alter the remote via git config: \fBlfs\.pruneremotetocheck\fR\. Set this to a different remote name to check that one instead of \'origin\'\. +.SH "SEE ALSO" +git\-lfs\-fetch(1), gitignore(5)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/git-lfs-3.2.0/man/man1/git-lfs-pull.1 b/git-lfs-3.2.0/man/man1/git-lfs-pull.1 new file mode 100644 index 0000000000000000000000000000000000000000..6a278668bd099ee70fd1a4868969a7ac80b547d6 --- /dev/null +++ b/git-lfs-3.2.0/man/man1/git-lfs-pull.1 @@ -0,0 +1,32 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-PULL" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-pull\fR \- Download all Git LFS files for current ref & checkout +.SH "SYNOPSIS" +\fBgit lfs pull\fR [options] [\fIremote\fR] +.SH "DESCRIPTION" +Download Git LFS objects for the currently checked out ref, and update the working copy with the downloaded content if required\. +.P +This is equivalent to running the following 2 commands: +.P +git lfs fetch [options] [\fIremote\fR] git lfs checkout +.SH "OPTIONS" +.TP +\fB\-I\fR \fIpaths\fR \fB\-\-include=\fR\fIpaths\fR +Specify lfs\.fetchinclude just for this invocation; see \fIINCLUDE AND EXCLUDE\fR +.TP +\fB\-X\fR \fIpaths\fR \fB\-\-exclude=\fR\fIpaths\fR +Specify lfs\.fetchexclude just for this invocation; see \fIINCLUDE AND EXCLUDE\fR +.SH "INCLUDE AND EXCLUDE" +You can configure Git LFS to only fetch objects to satisfy references in certain paths of the repo, and/or to exclude certain paths of the repo, to reduce the time you spend downloading things you do not use\. +.P +In your Git configuration or in a \fB\.lfsconfig\fR file, you may set either or both of \fBlfs\.fetchinclude\fR and \fBlfs\.fetchexclude\fR to comma\-separated lists of paths\. If \fBlfs\.fetchinclude\fR is defined, Git LFS objects will only be fetched if their path matches one in that list, and if \fBlfs\.fetchexclude\fR is defined, Git LFS objects will only be fetched if their path does not match one in that list\. Paths are matched using wildcard matching as per gitignore(5)\. +.P +Note that using the command\-line options \fB\-I\fR and \fB\-X\fR override the respective configuration settings\. Setting either option to an empty string clears the value\. +.SH "DEFAULT REMOTE" +Without arguments, pull downloads from the default remote\. The default remote is the same as for \fBgit pull\fR, i\.e\. based on the remote branch you\'re tracking first, or origin otherwise\. +.SH "SEE ALSO" +git\-lfs\-fetch(1), git\-lfs\-checkout(1), gitignore(5)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/git-lfs-3.2.0/man/man1/git-lfs-push.1 b/git-lfs-3.2.0/man/man1/git-lfs-push.1 new file mode 100644 index 0000000000000000000000000000000000000000..4e1be28a7c81fc849f1dc95a4f18e186c38c8c30 --- /dev/null +++ b/git-lfs-3.2.0/man/man1/git-lfs-push.1 @@ -0,0 +1,27 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-PUSH" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-push\fR \- Push queued large files to the Git LFS endpoint +.SH "SYNOPSIS" +\fBgit lfs push\fR \fIoptions\fR \fIremote\fR [\fIref\fR\|\.\|\.\|\.] +.br +\fBgit lfs push\fR \fIremote\fR [\fIref\fR\|\.\|\.\|\.] +.br +\fBgit lfs push\fR \-\-object\-id \fIremote\fR [\fIoid\fR\|\.\|\.\|\.] +.SH "DESCRIPTION" +Upload Git LFS files to the configured endpoint for the current Git remote\. By default, it filters out objects that are already referenced by the local clone of the remote\. +.SH "OPTIONS" +.TP +\fB\-\-dry\-run\fR +Print the files that would be pushed, without actually pushing them\. +.TP +\fB\-\-all\fR +This pushes all objects to the remote that are referenced by any commit reachable from the refs provided as arguments\. If no refs are provided, then all local refs are pushed\. Note that this behavior differs from that of git\-lfs\-fetch(1) when its \fB\-\-all\fR option is used; in that case, all refs are fetched, including refs other than those under \fBrefs/heads\fR and \fBrefs/tags\fR\. If you are migrating a repository with these commands, make sure to run \fBgit lfs push\fR for any additional remote refs that contain Git LFS objects not reachable from your local refs\. +.TP +\fB\-\-object\-id\fR +This pushes only the object OIDs listed at the end of the command, separated by spaces\. +.SH "SEE ALSO" +git\-lfs\-fetch(1), git\-lfs\-pre\-push(1)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/git-lfs-3.2.0/man/man1/git-lfs-smudge.1 b/git-lfs-3.2.0/man/man1/git-lfs-smudge.1 new file mode 100644 index 0000000000000000000000000000000000000000..b59233e308de3485dabe1bb08d61befdb5f7fbbb --- /dev/null +++ b/git-lfs-3.2.0/man/man1/git-lfs-smudge.1 @@ -0,0 +1,27 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-SMUDGE" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-smudge\fR \- Git smudge filter that converts pointer in blobs to the actual content +.SH "SYNOPSIS" +\fBgit lfs smudge\fR [\fIpath\fR] \fBgit lfs smudge\fR \-\-skip [\fIpath\fR] +.SH "DESCRIPTION" +Read a Git LFS pointer file from standard input and write the contents of the corresponding large file to standard output\. If needed, download the file\'s contents from the Git LFS endpoint\. The \fIpath\fR argument, if provided, is only used for a progress bar\. +.P +Smudge is typically run by Git\'s smudge filter, configured by the repository\'s Git attributes\. +.P +In your Git configuration or in a \fB\.lfsconfig\fR file, you may set either or both of \fBlfs\.fetchinclude\fR and \fBlfs\.fetchexclude\fR to comma\-separated lists of paths\. If \fBlfs\.fetchinclude\fR is defined, Git LFS pointer files will only be replaced with the contents of the corresponding Git LFS object file if their path matches one in that list, and if \fBlfs\.fetchexclude\fR is defined, Git LFS pointer files will only be replaced with the contents of the corresponding Git LFS object file if their path does not match one in that list\. Paths are matched using wildcard matching as per gitignore(5)\. Git LFS pointer files that are not replaced with the contents of their corresponding object files are simply copied to standard output without change\. +.SH "OPTIONS" +Without any options, \fBgit lfs smudge\fR outputs the raw Git LFS content to standard output\. +.TP +\fB\-\-skip\fR +Skip automatic downloading of objects on clone or pull\. +.TP +\fBGIT_LFS_SKIP_SMUDGE\fR +Disables the smudging process\. For more, see: git\-lfs\-config(5)\. +.SH "KNOWN BUGS" +On Windows, Git before 2\.34\.0 does not handle files in the working tree larger than 4 gigabytes\. Newer versions of Git, as well as Unix versions, are unaffected\. +.SH "SEE ALSO" +git\-lfs\-install(1), gitattributes(5), gitignore(5)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/git-lfs-3.2.0/man/man1/git-lfs-standalone-file.1 b/git-lfs-3.2.0/man/man1/git-lfs-standalone-file.1 new file mode 100644 index 0000000000000000000000000000000000000000..938f676785c8da51adb52fe29a0e515a13992fcb --- /dev/null +++ b/git-lfs-3.2.0/man/man1/git-lfs-standalone-file.1 @@ -0,0 +1,15 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-STANDALONE\-FILE" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-standalone\-file\fR \- Standalone transfer adapter for file URLs +.SH "SYNOPSIS" +\fBgit lfs standalone\-file\fR +.SH "DESCRIPTION" +Provides a standalone transfer adapter for file URLs (local paths)\. +.P +By default, Git LFS requires the support of an HTTP server to implement the Git LFS protocol\. However, this tool allows the use of URLs starting with \fBfile:///\fR (that is, those representing local paths) in addition\. Configuration is not necessary; Git LFS handles this internally\. +.P +When invoked, this tool speaks JSON on input and output as a standalone transfer adapter\. It is not intended for use by end users\. +.SH "SEE ALSO" +Part of the git\-lfs(1) suite\. diff --git a/git-lfs-3.2.0/man/man1/git-lfs-status.1 b/git-lfs-3.2.0/man/man1/git-lfs-status.1 new file mode 100644 index 0000000000000000000000000000000000000000..efd627b3add046209edc4603a63d1bfcc653d50e --- /dev/null +++ b/git-lfs-3.2.0/man/man1/git-lfs-status.1 @@ -0,0 +1,29 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-STATUS" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-status\fR \- Show the status of Git LFS files in the working tree +.SH "SYNOPSIS" +\fBgit lfs status\fR [\fIoptions\fR] +.SH "DESCRIPTION" +Display paths of Git LFS objects that +.IP "\[ci]" 4 +have not been pushed to the Git LFS server\. These are large files that would be uploaded by \fBgit push\fR\. +.IP "\[ci]" 4 +have differences between the index file and the current HEAD commit\. These are large files that would be committed by \fBgit commit\fR\. +.IP "\[ci]" 4 +have differences between the working tree and the index file\. These are files that could be staged using \fBgit add\fR\. +.IP "" 0 +.P +This command must be run in a non\-bare repository\. +.SH "OPTIONS" +.TP +\fB\-\-porcelain\fR +Give the output in an easy\-to\-parse format for scripts\. +.TP +\fB\-\-json\fR +Give the output in a stable json format for scripts\. +.SH "SEE ALSO" +git\-lfs\-ls\-files(1)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/git-lfs-3.2.0/man/man1/git-lfs-track.1 b/git-lfs-3.2.0/man/man1/git-lfs-track.1 new file mode 100644 index 0000000000000000000000000000000000000000..060599334a22337eb5d9c2e757a95194001f6b1b --- /dev/null +++ b/git-lfs-3.2.0/man/man1/git-lfs-track.1 @@ -0,0 +1,48 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-TRACK" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-track\fR \- View or add Git LFS paths to Git attributes +.SH "SYNOPSIS" +\fBgit lfs track\fR [options] [\fIpattern\fR\|\.\|\.\|\.] +.SH "DESCRIPTION" +Start tracking the given patterns(s) through Git LFS\. The \fIpattern\fR argument is written to \.gitattributes\. If no paths are provided, simply list the currently\-tracked paths\. +.P +The gitattributes documentation \fIhttps://git\-scm\.com/docs/gitattributes\fR states that patterns use the gitignore pattern rules \fIhttps://git\-scm\.com/docs/gitignore\fR to match paths\. This means that patterns which contain asterisk (\fB*\fR), question mark (\fB?\fR), and the bracket characters (\fB[\fR and \fB]\fR) are treated specially; to disable this behavior and treat them literally instead, use \fB\-\-filename\fR or escape the character with a backslash\. +.SH "OPTIONS" +.IP "\[ci]" 4 +\fB\-\-verbose\fR \fB\-v\fR: If enabled, have \fBgit lfs track\fR log files which it will touch\. Disabled by default\. +.IP "\[ci]" 4 +\fB\-\-dry\-run\fR \fB\-d\fR: If enabled, have \fBgit lfs track\fR log all actions it would normally take (adding entries to \.gitattributes, touching files on disk, etc) without performing any mutative operations to the disk\. +.IP +\fBgit lfs track \-\-dry\-run [files]\fR also implicitly mocks the behavior of passing the \fB\-\-verbose\fR, and will log in greater detail what it is doing\. +.IP +Disabled by default\. +.IP "\[ci]" 4 +\fB\-\-filename\fR Treat the arguments as literal filenames, not as patterns\. Any special glob characters in the filename will be escaped when writing the \fB\.gitattributes\fR file\. +.IP "\[ci]" 4 +\fB\-\-lockable\fR \fB\-l\fR Make the paths \'lockable\', meaning they should be locked to edit them, and will be made read\-only in the working copy when not locked\. +.IP "\[ci]" 4 +\fB\-\-not\-lockable\fR Remove the lockable flag from the paths so they are no longer read\-only unless locked\. +.IP "\[ci]" 4 +\fB\-\-no\-excluded\fR Do not list patterns that are excluded in the output; only list patterns that are tracked\. +.IP "\[ci]" 4 +\fB\-\-no\-modify\-attrs\fR Makes matched entries stat\-dirty so that Git can re\-index files you wish to convert to LFS\. Does not modify any \fB\.gitattributes\fR file(s)\. +.IP "" 0 +.SH "EXAMPLES" +.TP +List the patterns that Git LFS is currently tracking: + +.TP +Configure Git LFS to track GIF files: + +.TP +Configure Git LFS to track PSD files and make them read\-only unless locked: + +.TP +Configure Git LFS to track the file named \fBproject [1]\.psd\fR: + +.SH "SEE ALSO" +git\-lfs\-untrack(1), git\-lfs\-install(1), gitattributes(5), gitignore(5)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/git-lfs-3.2.0/man/man1/git-lfs-uninstall.1 b/git-lfs-3.2.0/man/man1/git-lfs-uninstall.1 new file mode 100644 index 0000000000000000000000000000000000000000..9589a7239d364ac262b0dcd4222c5f930a73f3d1 --- /dev/null +++ b/git-lfs-3.2.0/man/man1/git-lfs-uninstall.1 @@ -0,0 +1,31 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-UNINSTALL" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-uninstall\fR \- Remove Git LFS configuration +.SH "SYNOPSIS" +\fBgit lfs uninstall\fR +.SH "DESCRIPTION" +Perform the following actions to remove the Git LFS configuration: +.IP "\[ci]" 4 +Remove the "lfs" clean and smudge filters from the global Git config\. +.IP "\[ci]" 4 +Uninstall the Git LFS pre\-push hook if run from inside a Git repository\. +.IP "" 0 +.SH "OPTIONS" +.TP +\-\-local +Removes the "lfs" smudge and clean filters from the local repository\'s git config, instead of the global git config (~/\.gitconfig)\. +.TP +\-\-worktree +Removes the "lfs" smudge and clean filters from the current working tree\'s git config, instead of the global git config (~/\.gitconfig) or local repository\'s git config ($GIT_DIR/config)\. If multiple working trees are in use, the Git config extension \fBworktreeConfig\fR must be enabled to use this option\. If only one working tree is in use, \fB\-\-worktree\fR has the same effect as \fB\-\-local\fR\. This option is only available if the installed Git version is at least 2\.20\.0 and therefore supports the "worktreeConfig" extension\. +.TP +\-\-system +Removes the "lfs" smudge and clean filters from the system git config, instead of the global git config (~/\.gitconfig)\. +.TP +\-\-skip\-repo +Skips cleanup of the local repo; use if you want to uninstall the global lfs filters but not make changes to the current repo\. +.SH "SEE ALSO" +git\-lfs\-install(1), git\-worktree(1)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/git-lfs-3.2.0/man/man1/git-lfs-unlock.1 b/git-lfs-3.2.0/man/man1/git-lfs-unlock.1 new file mode 100644 index 0000000000000000000000000000000000000000..f149a2e3a981f608d75418784584e385e6f1b5a8 --- /dev/null +++ b/git-lfs-3.2.0/man/man1/git-lfs-unlock.1 @@ -0,0 +1,26 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-UNLOCK" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-unlock\fR \- Remove "locked" setting for a file on the Git LFS server +.SH "SYNOPSIS" +\fBgit lfs unlock\fR \fIOPTIONS\fR \fIpath\fR +.SH "DESCRIPTION" +Removes the given file path as "locked" on the Git LFS server\. Files must exist and have a clean git status before they can be unlocked\. The \fB\-\-force\fR flag will skip these checks\. +.SH "OPTIONS" +.TP +\fB\-r\fR \fIname\fR \fB\-\-remote=\fR\fIname\fR +Specify the Git LFS server to use\. Ignored if the \fBlfs\.url\fR config key is set\. +.TP +\fB\-f\fR \fB\-\-force\fR +Tells the server to remove the lock, even if it\'s owned by another user\. +.TP +\fB\-i \fR \fB\-\-id=\fR +Specifies a lock by its ID instead of path\. +.TP +\fB\-\-json\fR +Writes lock info as JSON to STDOUT if the command exits successfully\. Intended for interoperation with external tools\. If the command returns with a non\-zero exit code, plain text messages will be sent to STDERR\. +.SH "SEE ALSO" +git\-lfs\-lock(1), git\-lfs\-locks(1)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/git-lfs-3.2.0/man/man1/git-lfs-untrack.1 b/git-lfs-3.2.0/man/man1/git-lfs-untrack.1 new file mode 100644 index 0000000000000000000000000000000000000000..33ca6a3c4617907f0caf32cbdececf525fb1376b --- /dev/null +++ b/git-lfs-3.2.0/man/man1/git-lfs-untrack.1 @@ -0,0 +1,17 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-UNTRACK" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-untrack\fR \- Remove Git LFS paths from Git Attributes +.SH "SYNOPSIS" +\fBgit lfs untrack\fR \fIpath\fR\|\.\|\.\|\. +.SH "DESCRIPTION" +Stop tracking the given path(s) through Git LFS\. The \fIpath\fR argument can be a glob pattern or a file path\. +.SH "EXAMPLES" +.TP +Configure Git LFS to stop tracking GIF files: + +.SH "SEE ALSO" +git\-lfs\-track(1), git\-lfs\-install(1), gitattributes(5)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/git-lfs-3.2.0/man/man1/git-lfs-update.1 b/git-lfs-3.2.0/man/man1/git-lfs-update.1 new file mode 100644 index 0000000000000000000000000000000000000000..cdb653987af842ed5241b0bc56e185c2406a91c6 --- /dev/null +++ b/git-lfs-3.2.0/man/man1/git-lfs-update.1 @@ -0,0 +1,20 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-UPDATE" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-update\fR \- Update Git hooks +.SH "SYNOPSIS" +.TS +allbox; +\fBgit lfs update\fR [\-\-manual \-\-force] +.TE +.SH "DESCRIPTION" +Updates the Git hooks used by Git LFS\. Silently upgrades known hook contents\. If you have your own custom hooks you may need to use one of the extended options below\. +.SH "OPTIONS" +.IP "\[ci]" 4 +\fB\-\-manual\fR \fB\-m\fR Print instructions for manually updating your hooks to include git\-lfs functionality\. Use this option if \fBgit lfs update\fR fails because of existing hooks and you want to retain their functionality\. +.IP "\[ci]" 4 +\fB\-\-force\fR \fB\-f\fR Forcibly overwrite any existing hooks with git\-lfs hooks\. Use this option if \fBgit lfs update\fR fails because of existing hooks but you don\'t care about their current contents\. +.IP "" 0 +.SH "SEE ALSO" +Part of the git\-lfs(1) suite\. diff --git a/git-lfs-3.2.0/man/man1/git-lfs.1 b/git-lfs-3.2.0/man/man1/git-lfs.1 new file mode 100644 index 0000000000000000000000000000000000000000..260b66788b06c32b839f810599cf421ac4396ca0 --- /dev/null +++ b/git-lfs-3.2.0/man/man1/git-lfs.1 @@ -0,0 +1,149 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\fR \- Work with large files in Git repositories +.SH "SYNOPSIS" +\fBgit lfs\fR \fIcommand\fR [\fIargs\fR] +.SH "DESCRIPTION" +Git LFS is a system for managing and versioning large files in association with a Git repository\. Instead of storing the large files within the Git repository as blobs, Git LFS stores special "pointer files" in the repository, while storing the actual file contents on a Git LFS server\. The contents of the large file are downloaded automatically when needed, for example when a Git branch containing the large file is checked out\. +.P +Git LFS works by using a "smudge" filter to look up the large file contents based on the pointer file, and a "clean" filter to create a new version of the pointer file when the large file\'s contents change\. It also uses a \fBpre\-push\fR hook to upload the large file contents to the Git LFS server whenever a commit containing a new large file version is about to be pushed to the corresponding Git server\. +.SH "COMMANDS" +Like Git, Git LFS commands are separated into high level ("porcelain") commands and low level ("plumbing") commands\. +.SS "High level porcelain commands" +.TP +git\-lfs\-checkout(1) +Populate working copy with real content from Git LFS files\. +.TP +git\-lfs\-dedup(1) +De\-duplicate Git LFS files\. +.TP +git\-lfs\-env(1) +Display the Git LFS environment\. +.TP +git\-lfs\-ext(1) +Display Git LFS extension details\. +.TP +git\-lfs\-fetch(1) +Download Git LFS files from a remote\. +.TP +git\-lfs\-fsck(1) +Check Git LFS files for consistency\. +.TP +git\-lfs\-install(1) +Install Git LFS configuration\. +.TP +git\-lfs\-lock(1) +Set a file as "locked" on the Git LFS server\. +.TP +git\-lfs\-locks(1) +List currently "locked" files from the Git LFS server\. +.TP +git\-lfs\-logs(1) +Show errors from the Git LFS command\. +.TP +git\-lfs\-ls\-files(1) +Show information about Git LFS files in the index and working tree\. +.TP +git\-lfs\-migrate(1) +Migrate history to or from Git LFS +.TP +git\-lfs\-prune(1) +Delete old Git LFS files from local storage +.TP +git\-lfs\-pull(1) +Fetch Git LFS changes from the remote & checkout any required working tree files\. +.TP +git\-lfs\-push(1) +Push queued large files to the Git LFS endpoint\. +.TP +git\-lfs\-status(1) +Show the status of Git LFS files in the working tree\. +.TP +git\-lfs\-track(1) +View or add Git LFS paths to Git attributes\. +.TP +git\-lfs\-uninstall(1) +Uninstall Git LFS by removing hooks and smudge/clean filter configuration\. +.TP +git\-lfs\-unlock(1) +Remove "locked" setting for a file on the Git LFS server\. +.TP +git\-lfs\-untrack(1) +Remove Git LFS paths from Git Attributes\. +.TP +git\-lfs\-update(1) +Update Git hooks for the current Git repository\. +.TP +git\-lfs\-version(1) +Report the version number\. +.SS "Low level plumbing commands" +.TP +git\-lfs\-clean(1) +Git clean filter that converts large files to pointers\. +.TP +git\-lfs\-filter\-process(1) +Git process filter that converts between large files and pointers\. +.TP +git\-lfs\-merge\-driver(1) +Merge text\-based LFS files +.TP +git\-lfs\-pointer(1) +Build and compare pointers\. +.TP +git\-lfs\-post\-checkout(1) +Git post\-checkout hook implementation\. +.TP +git\-lfs\-post\-commit(1) +Git post\-commit hook implementation\. +.TP +git\-lfs\-post\-merge(1) +Git post\-merge hook implementation\. +.TP +git\-lfs\-pre\-push(1) +Git pre\-push hook implementation\. +.TP +git\-lfs\-smudge(1) +Git smudge filter that converts pointer in blobs to the actual content\. +.TP +git\-lfs\-standalone\-file(1) +Git LFS standalone transfer adapter for file URLs (local paths)\. +.SH "EXAMPLES" +To get started with Git LFS, the following commands can be used\. +.IP "1." 4 +Setup Git LFS on your system\. You only have to do this once per repository per machine: +.IP "" 4 +.nf +git lfs install +.fi +.IP "" 0 + +.IP "2." 4 +Choose the type of files you want to track, for examples all \fBISO\fR images, with git\-lfs\-track(1): +.IP "" 4 +.nf +git lfs track "*\.iso" +.fi +.IP "" 0 + +.IP "3." 4 +The above stores this information in gitattributes(5) files, so that file needs to be added to the repository: +.IP "" 4 +.nf +git add \.gitattributes +.fi +.IP "" 0 + +.IP "4." 4 +Commit, push and work with the files normally: +.IP "" 4 +.nf +git add file\.iso +git commit \-m "Add disk image" +git push +.fi +.IP "" 0 + +.IP "" 0 + diff --git a/git-lfs-3.2.0/man/man5/git-lfs-config.5 b/git-lfs-3.2.0/man/man5/git-lfs-config.5 new file mode 100644 index 0000000000000000000000000000000000000000..308f1622222dd48af76ff8664a0df36993b31b43 --- /dev/null +++ b/git-lfs-3.2.0/man/man5/git-lfs-config.5 @@ -0,0 +1,291 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-CONFIG" "5" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-config\fR \- Configuration options for git\-lfs +.SH "CONFIGURATION FILES" +git\-lfs reads its configuration from any file supported by \fBgit config \-l\fR, including all per\-repository, per\-user, and per\-system Git configuration files\. +.P +Additionally, a small number of settings can be specified in a file called \fB\.lfsconfig\fR at the root of the repository; see the "LFSCONFIG" section for more details\. This configuration file is useful for setting options such as the LFS URL or access type for all users of a repository, especially when these differ from the default\. The \fB\.lfsconfig\fR file uses the same format as \fB\.gitconfig\fR\. +.P +If the \fB\.lfsconfig\fR file is missing, the index is checked for a version of the file, and that is used instead\. If both are missing, \fBHEAD\fR is checked for the file\. If the repository is bare, only \fBHEAD\fR is checked\. This order may change for checkouts in the future to better match Git\'s behavior\. +.P +Settings from Git configuration files override the \fB\.lfsconfig\fR file\. This allows you to override settings like \fBlfs\.url\fR in your local environment without having to modify the \fB\.lfsconfig\fR file\. +.P +Most options regarding git\-lfs are contained in the \fB[lfs]\fR section, meaning they are all named \fBlfs\.foo\fR or similar, although occasionally an lfs option can be scoped inside the configuration for a remote\. +.SH "LIST OF OPTIONS" +.SS "General settings" +.IP "\[ci]" 4 +\fBlfs\.url\fR / \fBremote\.\.lfsurl\fR +.IP +The url used to call the Git LFS remote API\. Default blank (derive from clone URL)\. +.IP "\[ci]" 4 +\fBlfs\.pushurl\fR / \fBremote\.\.lfspushurl\fR +.IP +The url used to call the Git LFS remote API when pushing\. Default blank (derive from either LFS non\-push urls or clone url)\. +.IP "\[ci]" 4 +\fBremote\.lfsdefault\fR +.IP +The remote used to find the Git LFS remote API\. \fBlfs\.url\fR and \fBbranch\.*\.remote\fR for the current branch override this setting\. If this setting is not specified and there is exactly one remote, that remote is picked; otherwise, the default is \fBorigin\fR\. +.IP "\[ci]" 4 +\fBremote\.lfspushdefault\fR +.IP +The remote used to find the Git LFS remote API when pushing\. \fBlfs\.url\fR and \fBbranch\.*\.pushremote\fR for the current branch override this setting\. If this setting is not set, \fBremote\.pushdefault\fR is used, or if that is not set, the order of selection is used as specified in the \fBremote\.lfsdefault\fR above\. +.IP "\[ci]" 4 +\fBlfs\.dialtimeout\fR +.IP +Sets the maximum time, in seconds, that the HTTP client will wait to initiate a connection\. This does not include the time to send a request and wait for a response\. Default: 30 seconds +.IP "\[ci]" 4 +\fBlfs\.tlstimeout\fR +.IP +Sets the maximum time, in seconds, that the HTTP client will wait for a TLS handshake\. Default: 30 seconds\. +.IP "\[ci]" 4 +\fBlfs\.activitytimeout\fR / \fBlfs\.https://\.activitytimeout\fR +.IP +Sets the maximum time, in seconds, that the HTTP client will wait for the next tcp read or write\. If < 1, no activity timeout is used at all\. Default: 30 seconds +.IP "\[ci]" 4 +\fBlfs\.keepalive\fR +.IP +Sets the maximum time, in seconds, for the HTTP client to maintain keepalive connections\. Default: 30 minutes\. +.IP "\[ci]" 4 +\fBlfs\.ssh\.automultiplex\fR +.IP +When using the pure SSH\-based protocol, whether to multiplex requests over a single connection when possible\. This option requires the use of OpenSSH or a compatible SSH client\. Default: true\. +.IP "\[ci]" 4 +\fBlfs\.ssh\.retries\fR +.IP +Specifies the number of times Git LFS will attempt to obtain authorization via SSH before aborting\. Default: 5\. +.IP "\[ci]" 4 +\fBcore\.askpass\fR, GIT_ASKPASS +.IP +Given as a program and its arguments, this is invoked when authentication is needed against the LFS API\. The contents of stdout are interpreted as the password\. +.IP "\[ci]" 4 +\fBlfs\.cachecredentials\fR +.IP +Enables in\-memory SSH and Git Credential caching for a single \'git lfs\' command\. Default: enabled\. +.IP "\[ci]" 4 +\fBlfs\.storage\fR +.IP +Allow override LFS storage directory\. Non\-absolute path is relativized to inside of Git repository directory (usually \fB\.git\fR)\. +.IP +Note: you should not run \fBgit lfs prune\fR if you have different repositories sharing the same storage directory\. +.IP +Default: \fBlfs\fR in Git repository directory (usually \fB\.git/lfs\fR)\. +.IP "\[ci]" 4 +\fBlfs\.largefilewarning\fR +.IP +Warn when a file is 4 GiB or larger\. Such files will be corrupted when using Windows (unless smudging is disabled) with a Git for Windows version less than 2\.34\.0 due to a limitation in Git\. Default: true if the version is less than 2\.34\.0, false otherwise\. +.IP "" 0 +.SS "Upload and download transfer settings" +These settings control how the upload and download of LFS content occurs\. +.IP "\[ci]" 4 +\fBlfs\.concurrenttransfers\fR +.IP +The number of concurrent uploads/downloads\. Default 8\. +.IP "\[ci]" 4 +\fBlfs\.basictransfersonly\fR +.IP +If set to true, only basic HTTP upload/download transfers will be used, ignoring any more advanced transfers that the client/server may support\. This is primarily to work around bugs or incompatibilities\. +.IP +The git\-lfs client supports basic HTTP downloads, resumable HTTP downloads (using \fBRange\fR headers), and resumable uploads via tus\.io protocol\. Custom transfer methods can be added via \fBlfs\.customtransfer\fR (see next section)\. However setting this value to true limits the client to simple HTTP\. +.IP "\[ci]" 4 +\fBlfs\.tustransfers\fR +.IP +If set to true, this enables resumable uploads of LFS objects through the tus\.io API\. Once this feature is finalized, this setting will be removed, and tus\.io uploads will be available for all clients\. +.IP "\[ci]" 4 +\fBlfs\.standalonetransferagent\fR +.IP +Allows the specified custom transfer agent to be used directly for transferring files, without asking the server how the transfers should be made\. The custom transfer agent has to be defined in a \fBlfs\.customtransfer\.\fR settings group\. +.IP "\[ci]" 4 +\fBlfs\.customtransfer\.\.path\fR +.IP +\fBlfs\.customtransfer\.\fR is a settings group which defines a custom transfer hook which allows you to upload/download via an intermediate process, using any mechanism you like (rather than just HTTP)\. \fBpath\fR should point to the process you wish to invoke\. The protocol between the git\-lfs client and the custom transfer process is documented at https://github\.com/git\-lfs/git\-lfs/blob/main/docs/custom\-transfers\.md +.IP +\fIname\fR must be a unique identifier that the LFS server understands\. When calling the LFS API the client will include a list of supported transfer types\. If the server also supports this named transfer type, it will select it and actions returned from the API will be in relation to that transfer type (may not be traditional URLs for example)\. Only if the server accepts \fIname\fR as a transfer it supports will this custom transfer process be invoked\. +.IP "\[ci]" 4 +\fBlfs\.customtransfer\.\.args\fR +.IP +If the custom transfer process requires any arguments, these can be provided here\. This string will be expanded by the shell\. +.IP "\[ci]" 4 +\fBlfs\.customtransfer\.\.concurrent\fR +.IP +If true (the default), git\-lfs will invoke the custom transfer process multiple times in parallel, according to \fBlfs\.concurrenttransfers\fR, splitting the transfer workload between the processes\. +.IP "\[ci]" 4 +\fBlfs\.customtransfer\.\.direction\fR +.IP +Specifies which direction the custom transfer process supports, either "download", "upload", or "both"\. The default if unspecified is "both"\. +.IP "\[ci]" 4 +\fBlfs\.transfer\.maxretries\fR +.IP +Specifies how many retries LFS will attempt per OID before marking the transfer as failed\. Must be an integer which is at least one\. If the value is not an integer, is less than one, or is not given, a value of eight will be used instead\. +.IP "\[ci]" 4 +\fBlfs\.transfer\.maxretrydelay\fR +.IP +Specifies the maximum time in seconds LFS will wait between each retry attempt\. LFS uses exponential backoff for retries, doubling the time between each retry until reaching this limit\. If a server requests a delay using the \fBRetry\-After\fR header, the header value overrides the exponential delay for that attempt and is not limited by this option\. +.IP +Must be an integer which is not negative\. Use zero to disable delays between retries unless requested by a server\. If the value is not an integer, is negative, or is not given, a value of ten will be used instead\. +.IP "\[ci]" 4 +\fBlfs\.transfer\.maxverifies\fR +.IP +Specifies how many verification requests LFS will attempt per OID before marking the transfer as failed, if the object has a verification action associated with it\. Must be an integer which is at least one\. If the value is not an integer, is less than one, or is not given, a default value of three will be used instead\. +.IP "\[ci]" 4 +\fBlfs\.transfer\.enablehrefrewrite\fR +.IP +If set to true, this enables rewriting href of LFS objects using \fBurl\.*\.insteadof/pushinsteadof\fR config\. \fBpushinsteadof\fR is used only for uploading, and \fBinsteadof\fR is used for downloading and for uploading when \fBpushinsteadof\fR is not set\. +.IP "" 0 +.SS "Push settings" +.IP "\[ci]" 4 +\fBlfs\.allowincompletepush\fR +.IP +When pushing, allow objects to be missing from the local cache without halting a Git push\. Default: false\. +.IP "" 0 +.SS "Fetch settings" +.IP "\[ci]" 4 +\fBlfs\.fetchinclude\fR +.IP +When fetching, only download objects which match any entry on this comma\-separated list of paths/filenames\. Wildcard matching is as per gitignore(5)\. See git\-lfs\-fetch(1) for examples\. +.IP "\[ci]" 4 +\fBlfs\.fetchexclude\fR +.IP +When fetching, do not download objects which match any item on this comma\-separated list of paths/filenames\. Wildcard matching is as per gitignore(5)\. See git\-lfs\-fetch(1) for examples\. +.IP "\[ci]" 4 +\fBlfs\.fetchrecentrefsdays\fR +.IP +If non\-zero, fetches refs which have commits within N days of the current date\. Only local refs are included unless lfs\.fetchrecentremoterefs is true\. Also used as a basis for pruning old files\. The default is 7 days\. +.IP "\[ci]" 4 +\fBlfs\.fetchrecentremoterefs\fR +.IP +If true, fetches remote refs (for the remote you\'re fetching) as well as local refs in the recent window\. This is useful to fetch objects for remote branches you might want to check out later\. The default is true; if you set this to false, fetching for those branches will only occur when you either check them out (losing the advantage of fetch \-\-recent), or create a tracking local branch separately then fetch again\. +.IP "\[ci]" 4 +\fBlfs\.fetchrecentcommitsdays\fR +.IP +In addition to fetching at refs, also fetches previous changes made within N days of the latest commit on the ref\. This is useful if you\'re often reviewing recent changes\. Also used as a basis for pruning old files\. The default is 0 (no previous changes)\. +.IP "\[ci]" 4 +\fBlfs\.fetchrecentalways\fR +.IP +Always operate as if \-\-recent was included in a \fBgit lfs fetch\fR call\. Default false\. +.IP "" 0 +.SS "Prune settings" +.IP "\[ci]" 4 +\fBlfs\.pruneoffsetdays\fR +.IP +The number of days added to the \fBlfs\.fetchrecent*\fR settings to determine what can be pruned\. Default is 3 days, i\.e\. that anything fetched at the very oldest edge of the \'recent window\' is eligible for pruning 3 days later\. +.IP "\[ci]" 4 +\fBlfs\.pruneremotetocheck\fR +.IP +Set the remote that LFS files must have been pushed to in order for them to be considered eligible for local pruning\. Also the remote which is called if \-\-verify\-remote is enabled\. +.IP "\[ci]" 4 +\fBlfs\.pruneverifyremotealways\fR +.IP +Always run \fBgit lfs prune\fR as if \fB\-\-verify\-remote\fR was provided\. +.IP "" 0 +.SS "Extensions" +.IP "\[ci]" 4 +\fBlfs\.extension\.\.\fR +.IP +Git LFS extensions enable the manipulation of files streams during smudge and clean\. \fBname\fR groups the settings for a single extension, and the settings are: * \fBclean\fR The command which runs when files are added to the index * \fBsmudge\fR The command which runs when files are written to the working copy * \fBpriority\fR The order of this extension compared to others +.IP "" 0 +.SS "Other settings" +.IP "\[ci]" 4 +\fBlfs\.\.access\fR +.IP +Note: this setting is normally set by LFS itself on receiving a 401 response (authentication required), you don\'t normally need to set it manually\. +.IP +If set to "basic" then credentials will be requested before making batch requests to this url, otherwise a public request will initially be attempted\. +.IP "\[ci]" 4 +\fBlfs\.\.locksverify\fR +.IP +Determines whether locks are checked before Git pushes\. This prevents you from pushing changes to files that other users have locked\. The Git LFS pre\-push hook varies its behavior based on the value of this config key\. +.IP "\[ci]" 4 +\fBnull\fR \- In the absence of a value, Git LFS will attempt the call, and warn if it returns an error\. If the response is valid, Git LFS will set the value to \fBtrue\fR, and will halt the push if the user attempts to update a file locked by another user\. If the server returns a \fB501 Not Implemented\fR response, Git LFS will set the value to \fBfalse\.\fR +.IP "\[ci]" 4 +\fBtrue\fR \- Git LFS will attempt to verify locks, halting the Git push if there are any server issues, or if the user attempts to update a file locked by another user\. +.IP "\[ci]" 4 +\fBfalse\fR \- Git LFS will completely skip the lock check in the pre\-push hook\. You should set this if you\'re not using File Locking, or your Git server verifies locked files on pushes automatically\. +.IP "" 0 +.IP +Supports URL config lookup as described in: https://git\-scm\.com/docs/git\-config#git\-config\-httplturlgt\. To set this value per\-host: \fBgit config \-\-global lfs\.https://github\.com/\.locksverify [true|false]\fR\. +.IP "\[ci]" 4 +\fBlfs\.\.contenttype\fR +.IP +Determines whether Git LFS should attempt to detect an appropriate HTTP \fBContent\-Type\fR header when uploading using the \'basic\' upload adapter\. If set to false, the default header of \fBContent\-Type: application/octet\-stream\fR is chosen instead\. Default: \'true\'\. +.IP "\[ci]" 4 +\fBlfs\.skipdownloaderrors\fR +.IP +Causes Git LFS not to abort the smudge filter when a download error is encountered, which allows actions such as checkout to work when you are unable to download the LFS content\. LFS files which could not download will contain pointer content instead\. +.IP +Note that this will result in git commands which call the smudge filter to report success even in cases when LFS downloads fail, which may affect scripts\. +.IP +You can also set the environment variable GIT_LFS_SKIP_DOWNLOAD_ERRORS=1 to get the same effect\. +.IP "\[ci]" 4 +\fBGIT_LFS_PROGRESS\fR +.IP +This environment variable causes Git LFS to emit progress updates to an absolute file\-path on disk when cleaning, smudging, or fetching\. +.IP +Progress is reported periodically in the form of a new line being appended to the end of the file\. Each new line will take the following format: +.IP +\fB / / \fR +.IP +Each field is described below: * \fBdirection\fR: The direction of transfer, either "checkout", "download", or "upload"\. * \fBcurrent\fR The index of the currently transferring file\. * \fBtotal files\fR The estimated count of all files to be transferred\. * \fBdownloaded\fR The number of bytes already downloaded\. * \fBtotal\fR The entire size of the file, in bytes\. * \fBname\fR The name of the file\. +.IP "\[ci]" 4 +\fBGIT_LFS_FORCE_PROGRESS\fR \fBlfs\.forceprogress\fR +.IP +Controls whether Git LFS will suppress progress status when the standard output stream is not attached to a terminal\. The default is \fBfalse\fR which makes Git LFS detect whether stdout is a terminal and suppress progress when it\'s not; you can disable this behaviour and force progress status even when standard output stream is not a terminal by setting either variable to 1, \'yes\' or \'true\'\. +.IP "\[ci]" 4 +\fBGIT_LFS_SKIP_SMUDGE\fR +.IP +Sets whether or not Git LFS will skip attempting to convert pointers of files tracked into their corresponding objects when checked out into a working copy\. If \'true\', \'1\', \'on\', or similar, Git LFS will skip the smudge process in both \fBgit lfs smudge\fR and \fBgit lfs filter\-process\fR\. If unset, or set to \'false\', \'0\', \'off\', or similar, Git LFS will smudge files as normal\. +.IP "\[ci]" 4 +\fBGIT_LFS_SKIP_PUSH\fR +.IP +Sets whether or not Git LFS will attempt to upload new Git LFS object in a pre\-push hook\. If \'true\', \'1\', \'on\', or similar, Git LFS will skip the pre\-push hook, so no new Git LFS objects will be uploaded\. If unset, or set to \'false\', \'0\', \'off\', or similar, Git LFS will proceed as normal\. +.IP "\[ci]" 4 +\fBGIT_LFS_SET_LOCKABLE_READONLY\fR \fBlfs\.setlockablereadonly\fR +.IP +These settings, the first an environment variable and the second a gitconfig setting, control whether files marked as \'lockable\' in \fBgit lfs track\fR are made read\-only in the working copy when not locked by the current user\. The default is \fBtrue\fR; you can disable this behaviour and have all files writeable by setting either variable to 0, \'no\' or \'false\'\. +.IP "\[ci]" 4 +\fBlfs\.lockignoredfiles\fR +.IP +This setting controls whether Git LFS will set ignored files that match the lockable pattern read only as well as tracked files\. The default is \fBfalse\fR; you can enable this behavior by setting the variable to 1, \'yes\', or \'true\'\. +.IP "\[ci]" 4 +\fBlfs\.defaulttokenttl\fR +.IP +This setting sets a default token TTL when git\-lfs\-authenticate does not include the TTL in the JSON response but still enforces it\. +.IP +Note that this is only necessary for larger repositories hosted on LFS servers that don\'t include the TTL\. +.IP "" 0 +.SH "LFSCONFIG" +The \.lfsconfig file in a repository is read and interpreted in the same format as the file stored in \.git/config\. It allows a subset of keys to be used, including and limited to: +.IP "\[ci]" 4 +lfs\.allowincompletepush +.IP "\[ci]" 4 +lfs\.fetchexclude +.IP "\[ci]" 4 +lfs\.fetchinclude +.IP "\[ci]" 4 +lfs\.gitprotocol +.IP "\[ci]" 4 +lfs\.locksverify +.IP "\[ci]" 4 +lfs\.pushurl +.IP "\[ci]" 4 +lfs\.skipdownloaderrors +.IP "\[ci]" 4 +lfs\.url +.IP "\[ci]" 4 +lfs\.{*}\.access +.IP "\[ci]" 4 +remote\.{name}\.lfsurl +.IP "" 0 +.P +The set of keys allowed in this file is restricted for security reasons\. +.SH "EXAMPLES" +.TP +Configure a custom LFS endpoint for your repository: + +.P +\fBgit config \-f \.lfsconfig lfs\.url https://lfs\.example\.com/foo/bar/info/lfs\fR +.SH "SEE ALSO" +git\-config(1), git\-lfs\-install(1), gitattributes(5), gitignore(5)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/git-lfs-linux-amd64-v3.2.0.tar.gz b/git-lfs-linux-amd64-v3.2.0.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7beccddf270c0ce5f1d14badf30521455823a423 --- /dev/null +++ b/git-lfs-linux-amd64-v3.2.0.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6730b8036d9d99f872752489a331995930fec17b61c87c7af1945c65a482a50 +size 4449523 diff --git a/image/axolotl-badge-web.png b/image/axolotl-badge-web.png new file mode 100644 index 0000000000000000000000000000000000000000..42217dca31ca758d822a97655350793fa4e4dc59 Binary files /dev/null and b/image/axolotl-badge-web.png differ diff --git a/image/axolotl.png b/image/axolotl.png new file mode 100644 index 0000000000000000000000000000000000000000..21c27db852b0c6e8e00d28a2ebe666f3663c5784 Binary files /dev/null and b/image/axolotl.png differ diff --git a/image/sticker_fixed.png b/image/sticker_fixed.png new file mode 100644 index 0000000000000000000000000000000000000000..0f5c67a1922120993bd350eb1213e2d331478b76 Binary files /dev/null and b/image/sticker_fixed.png differ diff --git a/index.qmd b/index.qmd new file mode 100644 index 0000000000000000000000000000000000000000..87d6858808ba7d07b0d190f57f0d328149b3576e --- /dev/null +++ b/index.qmd @@ -0,0 +1,19 @@ + + +```{python} +#|output: asis +#|echo: false + +# This cell steals the README as the home page for now, but excludes the table of contents (quarto adds its own) +import re +pattern = re.compile( + r"\s*\s*\s*\s*
\s*## Table of Contents.*?
", + re.DOTALL | re.IGNORECASE +) + +with open('README.md', 'r') as f: + txt = f.read() + +cleaned = pattern.sub("", txt) +print(cleaned) +``` diff --git a/mistral-config.yml b/mistral-config.yml new file mode 100644 index 0000000000000000000000000000000000000000..55754737b9159a13e53dbc55dffb00a6f1f36778 --- /dev/null +++ b/mistral-config.yml @@ -0,0 +1,75 @@ +#Mistral-7b-Instruct-v0.2 +base_model: mistralai/Mistral-7B-Instruct-v0.2 +model_type: MistralForCausalLM +tokenizer_type: LlamaTokenizer +is_mistral_derived_model: true + +load_in_8bit: true +load_in_4bit: false +strict: false + +datasets: + - path: menna/tutor-synthetic + type: completion + field: text +dataset_prepared_path: last_run_prepared +hub_model_id: menna/mistral-synthetic-tutor +val_set_size: 0.05 +output_dir: ./out + +adapter: lora +lora_r: 8 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_modules: + - q_proj + - v_proj + +sequence_len: 512 +sample_packing: false +pad_to_sequence_len: true + +wandb_project: mistral-synthetic-tutor +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: checkpoint + +#only 2 epochs because of small dataset +gradient_accumulation_steps: 3 +micro_batch_size: 2 +num_epochs: 3 +optimizer: adamw_bnb_8bit +lr_scheduler: cosine +learning_rate: 0.0002 + +train_on_inputs: false +group_by_length: false +bf16: true +fp16: false +tf32: false + +chat_template: inst + +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: false + +warmup_steps: 10 +evals_per_epoch: 4 +eval_table_size: +eval_table_max_new_tokens: 128 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: + bos_token: "" + eos_token: "" + unk_token: "" diff --git a/out/README.md b/out/README.md new file mode 100644 index 0000000000000000000000000000000000000000..dbd433284f894bf2f6173494a57a0a558c47a050 --- /dev/null +++ b/out/README.md @@ -0,0 +1,162 @@ +--- +license: apache-2.0 +library_name: peft +tags: +- axolotl +- generated_from_trainer +base_model: mistralai/Mistral-7B-Instruct-v0.2 +model-index: +- name: mistral-synthetic-tutor-v2 + results: [] +--- + + + +[Built with Axolotl](https://github.com/OpenAccess-AI-Collective/axolotl) +
See axolotl config + +axolotl version: `0.4.0` +```yaml +#Mistral-7b-Instruct-v0.2 +base_model: mistralai/Mistral-7B-Instruct-v0.2 +model_type: MistralForCausalLM +tokenizer_type: LlamaTokenizer +is_mistral_derived_model: true + +load_in_8bit: true +load_in_4bit: false +strict: false + +datasets: + - path: menna/tutor-synthetic-v2 + type: completion + field: text +dataset_prepared_path: last_run_prepared +hub_model_id: menna/mistral-synthetic-tutor-v2 +val_set_size: 0.05 +output_dir: ./out + +adapter: lora +lora_r: 8 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_modules: + - q_proj + - v_proj + +sequence_len: 512 +sample_packing: false +pad_to_sequence_len: true + +wandb_project: mistral-synthetic-tutor-v2 +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: checkpoint + +#only 2 epochs because of small dataset +gradient_accumulation_steps: 3 +micro_batch_size: 2 +num_epochs: 3 +optimizer: adamw_bnb_8bit +lr_scheduler: cosine +learning_rate: 0.0002 + +train_on_inputs: false +group_by_length: false +bf16: true +fp16: false +tf32: false + +chat_template: inst + +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: false + +warmup_steps: 10 +evals_per_epoch: 4 +eval_table_size: +eval_table_max_new_tokens: 128 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: + bos_token: "" + eos_token: "" + unk_token: "" + +``` + +

+ +# mistral-synthetic-tutor-v2 + +This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) on the None dataset. +It achieves the following results on the evaluation set: +- Loss: 0.4684 + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 0.0002 +- train_batch_size: 2 +- eval_batch_size: 2 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 2 +- gradient_accumulation_steps: 3 +- total_train_batch_size: 12 +- total_eval_batch_size: 4 +- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08 +- lr_scheduler_type: cosine +- lr_scheduler_warmup_steps: 10 +- num_epochs: 3 + +### Training results + +| Training Loss | Epoch | Step | Validation Loss | +|:-------------:|:-----:|:----:|:---------------:| +| 3.2348 | 0.01 | 1 | 3.0256 | +| 1.2936 | 0.25 | 29 | 1.2551 | +| 0.9394 | 0.5 | 58 | 0.7861 | +| 0.6422 | 0.75 | 87 | 0.6252 | +| 0.4316 | 1.01 | 116 | 0.5628 | +| 0.4754 | 1.26 | 145 | 0.5256 | +| 0.4847 | 1.51 | 174 | 0.5035 | +| 0.4473 | 1.76 | 203 | 0.4886 | +| 0.5926 | 2.01 | 232 | 0.4762 | +| 0.322 | 2.26 | 261 | 0.4725 | +| 0.8129 | 2.51 | 290 | 0.4694 | +| 0.3698 | 2.77 | 319 | 0.4684 | + + +### Framework versions + +- PEFT 0.9.0 +- Transformers 4.40.0.dev0 +- Pytorch 2.2.1+cu121 +- Datasets 2.18.0 +- Tokenizers 0.15.0 \ No newline at end of file diff --git a/out/adapter_config.json b/out/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9b68237c5b29f907e3cc47e1a1c75ae55cd0de55 --- /dev/null +++ b/out/adapter_config.json @@ -0,0 +1,28 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/out/adapter_model.bin b/out/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..0024a0c79b3312dd826626b22b6a911a1c57ab11 --- /dev/null +++ b/out/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c66bc5cb9a034ad1405dc0cb820ef20e804b11604580782e5f9513b3c74874f3 +size 13677706 diff --git a/out/adapter_model.safetensors b/out/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2e1d9efb1165df78cbac61a27425e92c6201671f --- /dev/null +++ b/out/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e70061bca2c882df4379f2020bc2978d21cbe4015353e0ad37cc9fe032a4bf9 +size 13648432 diff --git a/out/checkpoint-126/README.md b/out/checkpoint-126/README.md new file mode 100644 index 0000000000000000000000000000000000000000..09b95f7bb1935f89566de0ae0601274fa95c48f2 --- /dev/null +++ b/out/checkpoint-126/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-Instruct-v0.2 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.9.0 \ No newline at end of file diff --git a/out/checkpoint-126/adapter_config.json b/out/checkpoint-126/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e08a39f33a0493676dddce78bf8ad487f547c451 --- /dev/null +++ b/out/checkpoint-126/adapter_config.json @@ -0,0 +1,28 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/out/checkpoint-126/adapter_model.safetensors b/out/checkpoint-126/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4d2f7b6332d29985c920e3b344aeaf5a5146fb3a --- /dev/null +++ b/out/checkpoint-126/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdd0e1db639d8f0e28165756fc416b875b0437a826eecb0de9ffa53ecee782bb +size 13648432 diff --git a/out/checkpoint-126/optimizer.pt b/out/checkpoint-126/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..abb2c41840d14641b1cc706538423cdbfaa6293a --- /dev/null +++ b/out/checkpoint-126/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1161bb7f7b67cfc80c14413646c9bacafedbd1cd1d4dffd1011430718e02c1e +size 6979130 diff --git a/out/checkpoint-126/rng_state_0.pth b/out/checkpoint-126/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..632b31074c2b16bdc38a56901612b3af26ac7ad1 --- /dev/null +++ b/out/checkpoint-126/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac697d2a1d3e589f92d9793211c1bf973b0b5cc34a14179aaa284f634df72801 +size 14512 diff --git a/out/checkpoint-126/rng_state_1.pth b/out/checkpoint-126/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..9edae2550f5007b7f5a818666dcd12878b048b12 --- /dev/null +++ b/out/checkpoint-126/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:faf54aec69c649e4af3a67e5966d57a42f14f7ee5943b542ced7ce50d20df222 +size 14512 diff --git a/out/checkpoint-126/scheduler.pt b/out/checkpoint-126/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..301df21c618cb7319562931666cd939e0d12b8c9 --- /dev/null +++ b/out/checkpoint-126/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aaacbd7832b1d89b3407b4546fc0a01a2b4702ab75915a3dce9d236bfbf9e372 +size 1064 diff --git a/out/checkpoint-126/special_tokens_map.json b/out/checkpoint-126/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..72ecfeeb7e14d244c936169d2ed139eeae235ef1 --- /dev/null +++ b/out/checkpoint-126/special_tokens_map.json @@ -0,0 +1,24 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "
", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/out/checkpoint-126/tokenizer.model b/out/checkpoint-126/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/out/checkpoint-126/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/out/checkpoint-126/tokenizer_config.json b/out/checkpoint-126/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a583d0f8c569fedb4f95590b442b2ac9f5fbd0bf --- /dev/null +++ b/out/checkpoint-126/tokenizer_config.json @@ -0,0 +1,45 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": true, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "bos_token": "", + "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "
", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false, + "use_fast": true +} diff --git a/out/checkpoint-126/trainer_state.json b/out/checkpoint-126/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3c88ca5e34972b0e862bf1e250917a09e061d000 --- /dev/null +++ b/out/checkpoint-126/trainer_state.json @@ -0,0 +1,967 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9894736842105263, + "eval_steps": 16, + "global_step": 126, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02, + "grad_norm": 5.992403507232666, + "learning_rate": 2e-05, + "loss": 4.1309, + "step": 1 + }, + { + "epoch": 0.02, + "eval_loss": 4.389205455780029, + "eval_runtime": 5.2346, + "eval_samples_per_second": 7.641, + "eval_steps_per_second": 1.91, + "step": 1 + }, + { + "epoch": 0.03, + "grad_norm": 5.414330959320068, + "learning_rate": 4e-05, + "loss": 3.9481, + "step": 2 + }, + { + "epoch": 0.05, + "grad_norm": 5.4069437980651855, + "learning_rate": 6e-05, + "loss": 3.6597, + "step": 3 + }, + { + "epoch": 0.06, + "grad_norm": 6.113742351531982, + "learning_rate": 8e-05, + "loss": 4.1631, + "step": 4 + }, + { + "epoch": 0.08, + "grad_norm": 5.780997276306152, + "learning_rate": 0.0001, + "loss": 3.807, + "step": 5 + }, + { + "epoch": 0.09, + "grad_norm": 6.321331977844238, + "learning_rate": 0.00012, + "loss": 3.6, + "step": 6 + }, + { + "epoch": 0.11, + "grad_norm": 6.7149882316589355, + "learning_rate": 0.00014, + "loss": 3.3887, + "step": 7 + }, + { + "epoch": 0.13, + "grad_norm": 6.037610054016113, + "learning_rate": 0.00016, + "loss": 3.0883, + "step": 8 + }, + { + "epoch": 0.14, + "grad_norm": 4.761746406555176, + "learning_rate": 0.00018, + "loss": 2.7721, + "step": 9 + }, + { + "epoch": 0.16, + "grad_norm": 3.740077495574951, + "learning_rate": 0.0002, + "loss": 2.4759, + "step": 10 + }, + { + "epoch": 0.17, + "grad_norm": 3.947979211807251, + "learning_rate": 0.00019998459887166634, + "loss": 2.3599, + "step": 11 + }, + { + "epoch": 0.19, + "grad_norm": 4.427234649658203, + "learning_rate": 0.00019993840023056043, + "loss": 2.2456, + "step": 12 + }, + { + "epoch": 0.21, + "grad_norm": 5.512731075286865, + "learning_rate": 0.00019986141830690625, + "loss": 2.0614, + "step": 13 + }, + { + "epoch": 0.22, + "grad_norm": 3.203895330429077, + "learning_rate": 0.00019975367681287356, + "loss": 1.6209, + "step": 14 + }, + { + "epoch": 0.24, + "grad_norm": 4.397416114807129, + "learning_rate": 0.00019961520893527383, + "loss": 2.6881, + "step": 15 + }, + { + "epoch": 0.25, + "grad_norm": 2.3654613494873047, + "learning_rate": 0.00019944605732533818, + "loss": 1.8533, + "step": 16 + }, + { + "epoch": 0.25, + "eval_loss": 1.8247573375701904, + "eval_runtime": 5.1787, + "eval_samples_per_second": 7.724, + "eval_steps_per_second": 1.931, + "step": 16 + }, + { + "epoch": 0.27, + "grad_norm": 2.5308055877685547, + "learning_rate": 0.00019924627408557963, + "loss": 1.9051, + "step": 17 + }, + { + "epoch": 0.28, + "grad_norm": 1.7537152767181396, + "learning_rate": 0.00019901592075374447, + "loss": 1.4936, + "step": 18 + }, + { + "epoch": 0.3, + "grad_norm": 1.7097824811935425, + "learning_rate": 0.00019875506828385722, + "loss": 1.6754, + "step": 19 + }, + { + "epoch": 0.32, + "grad_norm": 1.6216169595718384, + "learning_rate": 0.00019846379702436517, + "loss": 1.5318, + "step": 20 + }, + { + "epoch": 0.33, + "grad_norm": 1.5100860595703125, + "learning_rate": 0.00019814219669338928, + "loss": 1.6054, + "step": 21 + }, + { + "epoch": 0.35, + "grad_norm": 1.1800858974456787, + "learning_rate": 0.0001977903663510889, + "loss": 1.4081, + "step": 22 + }, + { + "epoch": 0.36, + "grad_norm": 1.3764004707336426, + "learning_rate": 0.00019740841436914917, + "loss": 1.5684, + "step": 23 + }, + { + "epoch": 0.38, + "grad_norm": 1.5436979532241821, + "learning_rate": 0.00019699645839739985, + "loss": 1.5433, + "step": 24 + }, + { + "epoch": 0.39, + "grad_norm": 1.2957199811935425, + "learning_rate": 0.00019655462532757676, + "loss": 1.3984, + "step": 25 + }, + { + "epoch": 0.41, + "grad_norm": 1.5687752962112427, + "learning_rate": 0.00019608305125423607, + "loss": 1.4136, + "step": 26 + }, + { + "epoch": 0.43, + "grad_norm": 2.148899793624878, + "learning_rate": 0.00019558188143283426, + "loss": 1.7407, + "step": 27 + }, + { + "epoch": 0.44, + "grad_norm": 1.304473638534546, + "learning_rate": 0.000195051270234986, + "loss": 1.3542, + "step": 28 + }, + { + "epoch": 0.46, + "grad_norm": 1.2896654605865479, + "learning_rate": 0.00019449138110091445, + "loss": 1.317, + "step": 29 + }, + { + "epoch": 0.47, + "grad_norm": 1.1432321071624756, + "learning_rate": 0.00019390238648910765, + "loss": 1.3113, + "step": 30 + }, + { + "epoch": 0.49, + "grad_norm": 1.5278006792068481, + "learning_rate": 0.00019328446782319772, + "loss": 1.4822, + "step": 31 + }, + { + "epoch": 0.51, + "grad_norm": 1.0685549974441528, + "learning_rate": 0.00019263781543607815, + "loss": 1.3071, + "step": 32 + }, + { + "epoch": 0.51, + "eval_loss": 1.316111445426941, + "eval_runtime": 5.1846, + "eval_samples_per_second": 7.715, + "eval_steps_per_second": 1.929, + "step": 32 + }, + { + "epoch": 0.52, + "grad_norm": 1.0776275396347046, + "learning_rate": 0.00019196262851127693, + "loss": 1.2977, + "step": 33 + }, + { + "epoch": 0.54, + "grad_norm": 1.4236021041870117, + "learning_rate": 0.00019125911502160364, + "loss": 1.3431, + "step": 34 + }, + { + "epoch": 0.55, + "grad_norm": 1.2043373584747314, + "learning_rate": 0.00019052749166508908, + "loss": 1.1973, + "step": 35 + }, + { + "epoch": 0.57, + "grad_norm": 1.4314641952514648, + "learning_rate": 0.0001897679837982373, + "loss": 1.2738, + "step": 36 + }, + { + "epoch": 0.58, + "grad_norm": 1.1113038063049316, + "learning_rate": 0.00018898082536661094, + "loss": 1.1974, + "step": 37 + }, + { + "epoch": 0.6, + "grad_norm": 1.3298149108886719, + "learning_rate": 0.00018816625883277043, + "loss": 1.2274, + "step": 38 + }, + { + "epoch": 0.62, + "grad_norm": 1.2348356246948242, + "learning_rate": 0.00018732453510159026, + "loss": 1.2461, + "step": 39 + }, + { + "epoch": 0.63, + "grad_norm": 1.0232346057891846, + "learning_rate": 0.0001864559134429745, + "loss": 1.166, + "step": 40 + }, + { + "epoch": 0.65, + "grad_norm": 1.1535675525665283, + "learning_rate": 0.000185560661411996, + "loss": 1.1162, + "step": 41 + }, + { + "epoch": 0.66, + "grad_norm": 1.2263463735580444, + "learning_rate": 0.00018463905476648307, + "loss": 1.1999, + "step": 42 + }, + { + "epoch": 0.68, + "grad_norm": 1.36984121799469, + "learning_rate": 0.0001836913773820802, + "loss": 1.2676, + "step": 43 + }, + { + "epoch": 0.69, + "grad_norm": 1.1568756103515625, + "learning_rate": 0.00018271792116480765, + "loss": 1.2012, + "step": 44 + }, + { + "epoch": 0.71, + "grad_norm": 1.0802751779556274, + "learning_rate": 0.00018171898596114805, + "loss": 1.1696, + "step": 45 + }, + { + "epoch": 0.73, + "grad_norm": 1.0523571968078613, + "learning_rate": 0.00018069487946568673, + "loss": 1.1398, + "step": 46 + }, + { + "epoch": 0.74, + "grad_norm": 1.1900922060012817, + "learning_rate": 0.00017964591712633498, + "loss": 1.2722, + "step": 47 + }, + { + "epoch": 0.76, + "grad_norm": 1.5743844509124756, + "learning_rate": 0.00017857242204716495, + "loss": 1.2833, + "step": 48 + }, + { + "epoch": 0.76, + "eval_loss": 1.1948559284210205, + "eval_runtime": 5.2088, + "eval_samples_per_second": 7.679, + "eval_steps_per_second": 1.92, + "step": 48 + }, + { + "epoch": 0.77, + "grad_norm": 1.3471274375915527, + "learning_rate": 0.00017747472488888622, + "loss": 1.2784, + "step": 49 + }, + { + "epoch": 0.79, + "grad_norm": 1.9713571071624756, + "learning_rate": 0.0001763531637669949, + "loss": 1.5447, + "step": 50 + }, + { + "epoch": 0.81, + "grad_norm": 1.1678998470306396, + "learning_rate": 0.00017520808414762641, + "loss": 1.1509, + "step": 51 + }, + { + "epoch": 0.82, + "grad_norm": 1.033021330833435, + "learning_rate": 0.0001740398387411442, + "loss": 1.0539, + "step": 52 + }, + { + "epoch": 0.84, + "grad_norm": 1.1294852495193481, + "learning_rate": 0.0001728487873934969, + "loss": 1.1355, + "step": 53 + }, + { + "epoch": 0.85, + "grad_norm": 1.2494419813156128, + "learning_rate": 0.00017163529697537754, + "loss": 1.1848, + "step": 54 + }, + { + "epoch": 0.87, + "grad_norm": 2.5106189250946045, + "learning_rate": 0.00017039974126921946, + "loss": 1.1048, + "step": 55 + }, + { + "epoch": 0.88, + "grad_norm": 0.9684445858001709, + "learning_rate": 0.0001691425008540625, + "loss": 1.0889, + "step": 56 + }, + { + "epoch": 0.9, + "grad_norm": 1.2388131618499756, + "learning_rate": 0.00016786396298832624, + "loss": 1.1113, + "step": 57 + }, + { + "epoch": 0.92, + "grad_norm": 1.2487854957580566, + "learning_rate": 0.0001665645214905257, + "loss": 1.1717, + "step": 58 + }, + { + "epoch": 0.93, + "grad_norm": 1.1480296850204468, + "learning_rate": 0.00016524457661796626, + "loss": 1.0576, + "step": 59 + }, + { + "epoch": 0.95, + "grad_norm": 1.1685549020767212, + "learning_rate": 0.0001639045349434554, + "loss": 1.1172, + "step": 60 + }, + { + "epoch": 0.96, + "grad_norm": 1.1080901622772217, + "learning_rate": 0.00016254480923006925, + "loss": 1.0975, + "step": 61 + }, + { + "epoch": 0.98, + "grad_norm": 1.9709242582321167, + "learning_rate": 0.00016116581830401193, + "loss": 1.1182, + "step": 62 + }, + { + "epoch": 0.99, + "grad_norm": 1.3067880868911743, + "learning_rate": 0.00015976798692560795, + "loss": 1.0966, + "step": 63 + }, + { + "epoch": 1.01, + "grad_norm": 1.0896201133728027, + "learning_rate": 0.00015835174565846622, + "loss": 1.1168, + "step": 64 + }, + { + "epoch": 1.01, + "eval_loss": 1.1173784732818604, + "eval_runtime": 5.2256, + "eval_samples_per_second": 7.655, + "eval_steps_per_second": 1.914, + "step": 64 + }, + { + "epoch": 1.03, + "grad_norm": 1.3303048610687256, + "learning_rate": 0.00015691753073685693, + "loss": 1.1814, + "step": 65 + }, + { + "epoch": 1.04, + "grad_norm": 1.2705278396606445, + "learning_rate": 0.0001554657839313413, + "loss": 1.0399, + "step": 66 + }, + { + "epoch": 1.06, + "grad_norm": 1.3033435344696045, + "learning_rate": 0.00015399695241269666, + "loss": 1.0667, + "step": 67 + }, + { + "epoch": 1.07, + "grad_norm": 1.1935135126113892, + "learning_rate": 0.00015251148861417733, + "loss": 1.0481, + "step": 68 + }, + { + "epoch": 1.09, + "grad_norm": 1.202915906906128, + "learning_rate": 0.0001510098500921552, + "loss": 1.0197, + "step": 69 + }, + { + "epoch": 1.11, + "grad_norm": 1.1750216484069824, + "learning_rate": 0.000149492499385182, + "loss": 1.0542, + "step": 70 + }, + { + "epoch": 1.12, + "grad_norm": 1.2564030885696411, + "learning_rate": 0.00014795990387151718, + "loss": 1.105, + "step": 71 + }, + { + "epoch": 1.14, + "grad_norm": 1.5712127685546875, + "learning_rate": 0.0001464125356251644, + "loss": 1.1838, + "step": 72 + }, + { + "epoch": 1.15, + "grad_norm": 1.232422947883606, + "learning_rate": 0.00014485087127046254, + "loss": 1.0357, + "step": 73 + }, + { + "epoch": 1.17, + "grad_norm": 1.5472371578216553, + "learning_rate": 0.00014327539183527446, + "loss": 1.1008, + "step": 74 + }, + { + "epoch": 1.18, + "grad_norm": 1.2168437242507935, + "learning_rate": 0.00014168658260281945, + "loss": 0.999, + "step": 75 + }, + { + "epoch": 1.2, + "grad_norm": 1.2842482328414917, + "learning_rate": 0.0001400849329621953, + "loss": 1.0174, + "step": 76 + }, + { + "epoch": 1.22, + "grad_norm": 1.3613685369491577, + "learning_rate": 0.00013847093625763516, + "loss": 1.0749, + "step": 77 + }, + { + "epoch": 1.23, + "grad_norm": 1.313064694404602, + "learning_rate": 0.0001368450896365467, + "loss": 1.0647, + "step": 78 + }, + { + "epoch": 1.25, + "grad_norm": 1.2898201942443848, + "learning_rate": 0.000135207893896379, + "loss": 0.9813, + "step": 79 + }, + { + "epoch": 1.26, + "grad_norm": 1.3023165464401245, + "learning_rate": 0.0001335598533303662, + "loss": 1.0158, + "step": 80 + }, + { + "epoch": 1.26, + "eval_loss": 1.0760670900344849, + "eval_runtime": 5.2422, + "eval_samples_per_second": 7.63, + "eval_steps_per_second": 1.908, + "step": 80 + }, + { + "epoch": 1.28, + "grad_norm": 1.3555819988250732, + "learning_rate": 0.00013190147557219338, + "loss": 1.1006, + "step": 81 + }, + { + "epoch": 1.29, + "grad_norm": 1.4895110130310059, + "learning_rate": 0.00013023327143963414, + "loss": 1.0924, + "step": 82 + }, + { + "epoch": 1.31, + "grad_norm": 1.3837958574295044, + "learning_rate": 0.0001285557547772072, + "loss": 1.0098, + "step": 83 + }, + { + "epoch": 1.33, + "grad_norm": 1.3755472898483276, + "learning_rate": 0.00012686944229790042, + "loss": 1.0196, + "step": 84 + }, + { + "epoch": 1.34, + "grad_norm": 1.115247368812561, + "learning_rate": 0.00012517485342401202, + "loss": 0.9746, + "step": 85 + }, + { + "epoch": 1.36, + "grad_norm": 1.2877663373947144, + "learning_rate": 0.00012347251012715627, + "loss": 1.0134, + "step": 86 + }, + { + "epoch": 1.37, + "grad_norm": 1.182273507118225, + "learning_rate": 0.00012176293676748493, + "loss": 1.0437, + "step": 87 + }, + { + "epoch": 1.39, + "grad_norm": 1.2405059337615967, + "learning_rate": 0.0001200466599321721, + "loss": 0.9615, + "step": 88 + }, + { + "epoch": 1.41, + "grad_norm": 1.3767961263656616, + "learning_rate": 0.00011832420827321373, + "loss": 1.0392, + "step": 89 + }, + { + "epoch": 1.42, + "grad_norm": 1.2421908378601074, + "learning_rate": 0.0001165961123445908, + "loss": 1.0476, + "step": 90 + }, + { + "epoch": 1.44, + "grad_norm": 1.2321393489837646, + "learning_rate": 0.00011486290443884666, + "loss": 0.9779, + "step": 91 + }, + { + "epoch": 1.45, + "grad_norm": 1.3787308931350708, + "learning_rate": 0.00011312511842312909, + "loss": 0.989, + "step": 92 + }, + { + "epoch": 1.47, + "grad_norm": 1.233562707901001, + "learning_rate": 0.00011138328957474691, + "loss": 0.9887, + "step": 93 + }, + { + "epoch": 1.48, + "grad_norm": 1.2139899730682373, + "learning_rate": 0.00010963795441629274, + "loss": 0.9525, + "step": 94 + }, + { + "epoch": 1.5, + "grad_norm": 1.5555188655853271, + "learning_rate": 0.00010788965055038178, + "loss": 1.0339, + "step": 95 + }, + { + "epoch": 1.52, + "grad_norm": 1.7850981950759888, + "learning_rate": 0.00010613891649405815, + "loss": 1.1342, + "step": 96 + }, + { + "epoch": 1.52, + "eval_loss": 1.0439285039901733, + "eval_runtime": 5.2194, + "eval_samples_per_second": 7.664, + "eval_steps_per_second": 1.916, + "step": 96 + }, + { + "epoch": 1.53, + "grad_norm": 1.3491744995117188, + "learning_rate": 0.00010438629151291943, + "loss": 1.0061, + "step": 97 + }, + { + "epoch": 1.55, + "grad_norm": 1.359479308128357, + "learning_rate": 0.00010263231545501068, + "loss": 0.9811, + "step": 98 + }, + { + "epoch": 1.56, + "grad_norm": 1.1872361898422241, + "learning_rate": 0.00010087752858453923, + "loss": 0.9766, + "step": 99 + }, + { + "epoch": 1.58, + "grad_norm": 1.2711139917373657, + "learning_rate": 9.912247141546079e-05, + "loss": 1.0206, + "step": 100 + }, + { + "epoch": 1.59, + "grad_norm": 1.5995763540267944, + "learning_rate": 9.736768454498936e-05, + "loss": 1.0461, + "step": 101 + }, + { + "epoch": 1.61, + "grad_norm": 2.560168743133545, + "learning_rate": 9.56137084870806e-05, + "loss": 1.1224, + "step": 102 + }, + { + "epoch": 1.63, + "grad_norm": 1.3833796977996826, + "learning_rate": 9.38610835059419e-05, + "loss": 1.063, + "step": 103 + }, + { + "epoch": 1.64, + "grad_norm": 1.840963363647461, + "learning_rate": 9.211034944961823e-05, + "loss": 1.1159, + "step": 104 + }, + { + "epoch": 1.66, + "grad_norm": 1.2169251441955566, + "learning_rate": 9.036204558370725e-05, + "loss": 0.9595, + "step": 105 + }, + { + "epoch": 1.67, + "grad_norm": 1.2543871402740479, + "learning_rate": 8.861671042525311e-05, + "loss": 1.0113, + "step": 106 + }, + { + "epoch": 1.69, + "grad_norm": 1.2910866737365723, + "learning_rate": 8.68748815768709e-05, + "loss": 0.9819, + "step": 107 + }, + { + "epoch": 1.71, + "grad_norm": 1.3683500289916992, + "learning_rate": 8.513709556115335e-05, + "loss": 0.9015, + "step": 108 + }, + { + "epoch": 1.72, + "grad_norm": 1.5607995986938477, + "learning_rate": 8.340388765540923e-05, + "loss": 1.0256, + "step": 109 + }, + { + "epoch": 1.74, + "grad_norm": 1.6682592630386353, + "learning_rate": 8.16757917267863e-05, + "loss": 1.0977, + "step": 110 + }, + { + "epoch": 1.75, + "grad_norm": 1.2137242555618286, + "learning_rate": 7.995334006782793e-05, + "loss": 0.9663, + "step": 111 + }, + { + "epoch": 1.77, + "grad_norm": 1.2425730228424072, + "learning_rate": 7.823706323251512e-05, + "loss": 0.955, + "step": 112 + }, + { + "epoch": 1.77, + "eval_loss": 1.0228078365325928, + "eval_runtime": 5.2561, + "eval_samples_per_second": 7.61, + "eval_steps_per_second": 1.903, + "step": 112 + }, + { + "epoch": 1.78, + "grad_norm": 1.415521502494812, + "learning_rate": 7.652748987284375e-05, + "loss": 1.037, + "step": 113 + }, + { + "epoch": 1.8, + "grad_norm": 1.2909739017486572, + "learning_rate": 7.482514657598799e-05, + "loss": 0.8776, + "step": 114 + }, + { + "epoch": 1.82, + "grad_norm": 1.7562286853790283, + "learning_rate": 7.31305577020996e-05, + "loss": 0.9811, + "step": 115 + }, + { + "epoch": 1.83, + "grad_norm": 1.5445969104766846, + "learning_rate": 7.144424522279283e-05, + "loss": 0.9487, + "step": 116 + }, + { + "epoch": 1.85, + "grad_norm": 1.475395679473877, + "learning_rate": 6.976672856036585e-05, + "loss": 1.0024, + "step": 117 + }, + { + "epoch": 1.86, + "grad_norm": 1.3962652683258057, + "learning_rate": 6.809852442780664e-05, + "loss": 0.9585, + "step": 118 + }, + { + "epoch": 1.88, + "grad_norm": 1.430710792541504, + "learning_rate": 6.644014666963386e-05, + "loss": 1.0057, + "step": 119 + }, + { + "epoch": 1.89, + "grad_norm": 1.3526166677474976, + "learning_rate": 6.479210610362103e-05, + "loss": 0.9973, + "step": 120 + }, + { + "epoch": 1.91, + "grad_norm": 1.506891131401062, + "learning_rate": 6.315491036345338e-05, + "loss": 0.9622, + "step": 121 + }, + { + "epoch": 1.93, + "grad_norm": 1.2012327909469604, + "learning_rate": 6.152906374236485e-05, + "loss": 0.9271, + "step": 122 + }, + { + "epoch": 1.94, + "grad_norm": 1.6816527843475342, + "learning_rate": 5.991506703780475e-05, + "loss": 1.0185, + "step": 123 + }, + { + "epoch": 1.96, + "grad_norm": 1.5374231338500977, + "learning_rate": 5.831341739718055e-05, + "loss": 0.9864, + "step": 124 + }, + { + "epoch": 1.97, + "grad_norm": 1.3972766399383545, + "learning_rate": 5.672460816472556e-05, + "loss": 0.9894, + "step": 125 + }, + { + "epoch": 1.99, + "grad_norm": 1.3701297044754028, + "learning_rate": 5.5149128729537457e-05, + "loss": 0.9733, + "step": 126 + } + ], + "logging_steps": 1, + "max_steps": 189, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 63, + "total_flos": 3.304387819811635e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/out/checkpoint-126/training_args.bin b/out/checkpoint-126/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47200fcd99a14764a486a432d79a2faffdf139d3 --- /dev/null +++ b/out/checkpoint-126/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c270bfcfa5d2bb7a14a890f8d6ed0bdf6d120bfca89bef54d9b15f8b78d675e4 +size 5688 diff --git a/out/checkpoint-189/README.md b/out/checkpoint-189/README.md new file mode 100644 index 0000000000000000000000000000000000000000..09b95f7bb1935f89566de0ae0601274fa95c48f2 --- /dev/null +++ b/out/checkpoint-189/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-Instruct-v0.2 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.9.0 \ No newline at end of file diff --git a/out/checkpoint-189/adapter_config.json b/out/checkpoint-189/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e08a39f33a0493676dddce78bf8ad487f547c451 --- /dev/null +++ b/out/checkpoint-189/adapter_config.json @@ -0,0 +1,28 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/out/checkpoint-189/adapter_model.safetensors b/out/checkpoint-189/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6ec5dcc6586b023bb072dafa28b94a1f318a9d41 --- /dev/null +++ b/out/checkpoint-189/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:492a3be3c6da61d46af905ed73825d5632fd30af5297d441b4216d3c26047e08 +size 13648432 diff --git a/out/checkpoint-189/optimizer.pt b/out/checkpoint-189/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cc6eb1506a76e4f5b5b54b829a836c4420b96c44 --- /dev/null +++ b/out/checkpoint-189/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f88a9b87b03ab7e5aafc85fc792795e9d835e87ec11eb58f6930148ec18b0b73 +size 6979130 diff --git a/out/checkpoint-189/rng_state_0.pth b/out/checkpoint-189/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..c3ccea23d079b1895f067689f230ad1d95920fa4 --- /dev/null +++ b/out/checkpoint-189/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89c5c0dea912ac4dd8b3b53361ecb18b660bef15a86063508f6af16b491ad31b +size 14512 diff --git a/out/checkpoint-189/rng_state_1.pth b/out/checkpoint-189/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..959cb96b6e0e8b74f7662b37671644706db0629a --- /dev/null +++ b/out/checkpoint-189/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9e0c8090cbdae285d03cdc20a293fb32caf8d28e10346458883ccc4271f84bc +size 14512 diff --git a/out/checkpoint-189/scheduler.pt b/out/checkpoint-189/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f54608eb0ed4f09ec2243477d652cbe58ab3d484 --- /dev/null +++ b/out/checkpoint-189/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:399c207d5814a42f8b20e25df3fedb22c4b1c005b6a9c0d1622291adf032b026 +size 1064 diff --git a/out/checkpoint-189/special_tokens_map.json b/out/checkpoint-189/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..72ecfeeb7e14d244c936169d2ed139eeae235ef1 --- /dev/null +++ b/out/checkpoint-189/special_tokens_map.json @@ -0,0 +1,24 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/out/checkpoint-189/tokenizer.model b/out/checkpoint-189/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/out/checkpoint-189/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/out/checkpoint-189/tokenizer_config.json b/out/checkpoint-189/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a583d0f8c569fedb4f95590b442b2ac9f5fbd0bf --- /dev/null +++ b/out/checkpoint-189/tokenizer_config.json @@ -0,0 +1,45 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": true, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "bos_token": "", + "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false, + "use_fast": true +} diff --git a/out/checkpoint-189/trainer_state.json b/out/checkpoint-189/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..06f156e38366a45bb81fb81f019dfd9c1afac08e --- /dev/null +++ b/out/checkpoint-189/trainer_state.json @@ -0,0 +1,1440 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.984210526315789, + "eval_steps": 16, + "global_step": 189, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02, + "grad_norm": 5.992403507232666, + "learning_rate": 2e-05, + "loss": 4.1309, + "step": 1 + }, + { + "epoch": 0.02, + "eval_loss": 4.389205455780029, + "eval_runtime": 5.2346, + "eval_samples_per_second": 7.641, + "eval_steps_per_second": 1.91, + "step": 1 + }, + { + "epoch": 0.03, + "grad_norm": 5.414330959320068, + "learning_rate": 4e-05, + "loss": 3.9481, + "step": 2 + }, + { + "epoch": 0.05, + "grad_norm": 5.4069437980651855, + "learning_rate": 6e-05, + "loss": 3.6597, + "step": 3 + }, + { + "epoch": 0.06, + "grad_norm": 6.113742351531982, + "learning_rate": 8e-05, + "loss": 4.1631, + "step": 4 + }, + { + "epoch": 0.08, + "grad_norm": 5.780997276306152, + "learning_rate": 0.0001, + "loss": 3.807, + "step": 5 + }, + { + "epoch": 0.09, + "grad_norm": 6.321331977844238, + "learning_rate": 0.00012, + "loss": 3.6, + "step": 6 + }, + { + "epoch": 0.11, + "grad_norm": 6.7149882316589355, + "learning_rate": 0.00014, + "loss": 3.3887, + "step": 7 + }, + { + "epoch": 0.13, + "grad_norm": 6.037610054016113, + "learning_rate": 0.00016, + "loss": 3.0883, + "step": 8 + }, + { + "epoch": 0.14, + "grad_norm": 4.761746406555176, + "learning_rate": 0.00018, + "loss": 2.7721, + "step": 9 + }, + { + "epoch": 0.16, + "grad_norm": 3.740077495574951, + "learning_rate": 0.0002, + "loss": 2.4759, + "step": 10 + }, + { + "epoch": 0.17, + "grad_norm": 3.947979211807251, + "learning_rate": 0.00019998459887166634, + "loss": 2.3599, + "step": 11 + }, + { + "epoch": 0.19, + "grad_norm": 4.427234649658203, + "learning_rate": 0.00019993840023056043, + "loss": 2.2456, + "step": 12 + }, + { + "epoch": 0.21, + "grad_norm": 5.512731075286865, + "learning_rate": 0.00019986141830690625, + "loss": 2.0614, + "step": 13 + }, + { + "epoch": 0.22, + "grad_norm": 3.203895330429077, + "learning_rate": 0.00019975367681287356, + "loss": 1.6209, + "step": 14 + }, + { + "epoch": 0.24, + "grad_norm": 4.397416114807129, + "learning_rate": 0.00019961520893527383, + "loss": 2.6881, + "step": 15 + }, + { + "epoch": 0.25, + "grad_norm": 2.3654613494873047, + "learning_rate": 0.00019944605732533818, + "loss": 1.8533, + "step": 16 + }, + { + "epoch": 0.25, + "eval_loss": 1.8247573375701904, + "eval_runtime": 5.1787, + "eval_samples_per_second": 7.724, + "eval_steps_per_second": 1.931, + "step": 16 + }, + { + "epoch": 0.27, + "grad_norm": 2.5308055877685547, + "learning_rate": 0.00019924627408557963, + "loss": 1.9051, + "step": 17 + }, + { + "epoch": 0.28, + "grad_norm": 1.7537152767181396, + "learning_rate": 0.00019901592075374447, + "loss": 1.4936, + "step": 18 + }, + { + "epoch": 0.3, + "grad_norm": 1.7097824811935425, + "learning_rate": 0.00019875506828385722, + "loss": 1.6754, + "step": 19 + }, + { + "epoch": 0.32, + "grad_norm": 1.6216169595718384, + "learning_rate": 0.00019846379702436517, + "loss": 1.5318, + "step": 20 + }, + { + "epoch": 0.33, + "grad_norm": 1.5100860595703125, + "learning_rate": 0.00019814219669338928, + "loss": 1.6054, + "step": 21 + }, + { + "epoch": 0.35, + "grad_norm": 1.1800858974456787, + "learning_rate": 0.0001977903663510889, + "loss": 1.4081, + "step": 22 + }, + { + "epoch": 0.36, + "grad_norm": 1.3764004707336426, + "learning_rate": 0.00019740841436914917, + "loss": 1.5684, + "step": 23 + }, + { + "epoch": 0.38, + "grad_norm": 1.5436979532241821, + "learning_rate": 0.00019699645839739985, + "loss": 1.5433, + "step": 24 + }, + { + "epoch": 0.39, + "grad_norm": 1.2957199811935425, + "learning_rate": 0.00019655462532757676, + "loss": 1.3984, + "step": 25 + }, + { + "epoch": 0.41, + "grad_norm": 1.5687752962112427, + "learning_rate": 0.00019608305125423607, + "loss": 1.4136, + "step": 26 + }, + { + "epoch": 0.43, + "grad_norm": 2.148899793624878, + "learning_rate": 0.00019558188143283426, + "loss": 1.7407, + "step": 27 + }, + { + "epoch": 0.44, + "grad_norm": 1.304473638534546, + "learning_rate": 0.000195051270234986, + "loss": 1.3542, + "step": 28 + }, + { + "epoch": 0.46, + "grad_norm": 1.2896654605865479, + "learning_rate": 0.00019449138110091445, + "loss": 1.317, + "step": 29 + }, + { + "epoch": 0.47, + "grad_norm": 1.1432321071624756, + "learning_rate": 0.00019390238648910765, + "loss": 1.3113, + "step": 30 + }, + { + "epoch": 0.49, + "grad_norm": 1.5278006792068481, + "learning_rate": 0.00019328446782319772, + "loss": 1.4822, + "step": 31 + }, + { + "epoch": 0.51, + "grad_norm": 1.0685549974441528, + "learning_rate": 0.00019263781543607815, + "loss": 1.3071, + "step": 32 + }, + { + "epoch": 0.51, + "eval_loss": 1.316111445426941, + "eval_runtime": 5.1846, + "eval_samples_per_second": 7.715, + "eval_steps_per_second": 1.929, + "step": 32 + }, + { + "epoch": 0.52, + "grad_norm": 1.0776275396347046, + "learning_rate": 0.00019196262851127693, + "loss": 1.2977, + "step": 33 + }, + { + "epoch": 0.54, + "grad_norm": 1.4236021041870117, + "learning_rate": 0.00019125911502160364, + "loss": 1.3431, + "step": 34 + }, + { + "epoch": 0.55, + "grad_norm": 1.2043373584747314, + "learning_rate": 0.00019052749166508908, + "loss": 1.1973, + "step": 35 + }, + { + "epoch": 0.57, + "grad_norm": 1.4314641952514648, + "learning_rate": 0.0001897679837982373, + "loss": 1.2738, + "step": 36 + }, + { + "epoch": 0.58, + "grad_norm": 1.1113038063049316, + "learning_rate": 0.00018898082536661094, + "loss": 1.1974, + "step": 37 + }, + { + "epoch": 0.6, + "grad_norm": 1.3298149108886719, + "learning_rate": 0.00018816625883277043, + "loss": 1.2274, + "step": 38 + }, + { + "epoch": 0.62, + "grad_norm": 1.2348356246948242, + "learning_rate": 0.00018732453510159026, + "loss": 1.2461, + "step": 39 + }, + { + "epoch": 0.63, + "grad_norm": 1.0232346057891846, + "learning_rate": 0.0001864559134429745, + "loss": 1.166, + "step": 40 + }, + { + "epoch": 0.65, + "grad_norm": 1.1535675525665283, + "learning_rate": 0.000185560661411996, + "loss": 1.1162, + "step": 41 + }, + { + "epoch": 0.66, + "grad_norm": 1.2263463735580444, + "learning_rate": 0.00018463905476648307, + "loss": 1.1999, + "step": 42 + }, + { + "epoch": 0.68, + "grad_norm": 1.36984121799469, + "learning_rate": 0.0001836913773820802, + "loss": 1.2676, + "step": 43 + }, + { + "epoch": 0.69, + "grad_norm": 1.1568756103515625, + "learning_rate": 0.00018271792116480765, + "loss": 1.2012, + "step": 44 + }, + { + "epoch": 0.71, + "grad_norm": 1.0802751779556274, + "learning_rate": 0.00018171898596114805, + "loss": 1.1696, + "step": 45 + }, + { + "epoch": 0.73, + "grad_norm": 1.0523571968078613, + "learning_rate": 0.00018069487946568673, + "loss": 1.1398, + "step": 46 + }, + { + "epoch": 0.74, + "grad_norm": 1.1900922060012817, + "learning_rate": 0.00017964591712633498, + "loss": 1.2722, + "step": 47 + }, + { + "epoch": 0.76, + "grad_norm": 1.5743844509124756, + "learning_rate": 0.00017857242204716495, + "loss": 1.2833, + "step": 48 + }, + { + "epoch": 0.76, + "eval_loss": 1.1948559284210205, + "eval_runtime": 5.2088, + "eval_samples_per_second": 7.679, + "eval_steps_per_second": 1.92, + "step": 48 + }, + { + "epoch": 0.77, + "grad_norm": 1.3471274375915527, + "learning_rate": 0.00017747472488888622, + "loss": 1.2784, + "step": 49 + }, + { + "epoch": 0.79, + "grad_norm": 1.9713571071624756, + "learning_rate": 0.0001763531637669949, + "loss": 1.5447, + "step": 50 + }, + { + "epoch": 0.81, + "grad_norm": 1.1678998470306396, + "learning_rate": 0.00017520808414762641, + "loss": 1.1509, + "step": 51 + }, + { + "epoch": 0.82, + "grad_norm": 1.033021330833435, + "learning_rate": 0.0001740398387411442, + "loss": 1.0539, + "step": 52 + }, + { + "epoch": 0.84, + "grad_norm": 1.1294852495193481, + "learning_rate": 0.0001728487873934969, + "loss": 1.1355, + "step": 53 + }, + { + "epoch": 0.85, + "grad_norm": 1.2494419813156128, + "learning_rate": 0.00017163529697537754, + "loss": 1.1848, + "step": 54 + }, + { + "epoch": 0.87, + "grad_norm": 2.5106189250946045, + "learning_rate": 0.00017039974126921946, + "loss": 1.1048, + "step": 55 + }, + { + "epoch": 0.88, + "grad_norm": 0.9684445858001709, + "learning_rate": 0.0001691425008540625, + "loss": 1.0889, + "step": 56 + }, + { + "epoch": 0.9, + "grad_norm": 1.2388131618499756, + "learning_rate": 0.00016786396298832624, + "loss": 1.1113, + "step": 57 + }, + { + "epoch": 0.92, + "grad_norm": 1.2487854957580566, + "learning_rate": 0.0001665645214905257, + "loss": 1.1717, + "step": 58 + }, + { + "epoch": 0.93, + "grad_norm": 1.1480296850204468, + "learning_rate": 0.00016524457661796626, + "loss": 1.0576, + "step": 59 + }, + { + "epoch": 0.95, + "grad_norm": 1.1685549020767212, + "learning_rate": 0.0001639045349434554, + "loss": 1.1172, + "step": 60 + }, + { + "epoch": 0.96, + "grad_norm": 1.1080901622772217, + "learning_rate": 0.00016254480923006925, + "loss": 1.0975, + "step": 61 + }, + { + "epoch": 0.98, + "grad_norm": 1.9709242582321167, + "learning_rate": 0.00016116581830401193, + "loss": 1.1182, + "step": 62 + }, + { + "epoch": 0.99, + "grad_norm": 1.3067880868911743, + "learning_rate": 0.00015976798692560795, + "loss": 1.0966, + "step": 63 + }, + { + "epoch": 1.01, + "grad_norm": 1.0896201133728027, + "learning_rate": 0.00015835174565846622, + "loss": 1.1168, + "step": 64 + }, + { + "epoch": 1.01, + "eval_loss": 1.1173784732818604, + "eval_runtime": 5.2256, + "eval_samples_per_second": 7.655, + "eval_steps_per_second": 1.914, + "step": 64 + }, + { + "epoch": 1.03, + "grad_norm": 1.3303048610687256, + "learning_rate": 0.00015691753073685693, + "loss": 1.1814, + "step": 65 + }, + { + "epoch": 1.04, + "grad_norm": 1.2705278396606445, + "learning_rate": 0.0001554657839313413, + "loss": 1.0399, + "step": 66 + }, + { + "epoch": 1.06, + "grad_norm": 1.3033435344696045, + "learning_rate": 0.00015399695241269666, + "loss": 1.0667, + "step": 67 + }, + { + "epoch": 1.07, + "grad_norm": 1.1935135126113892, + "learning_rate": 0.00015251148861417733, + "loss": 1.0481, + "step": 68 + }, + { + "epoch": 1.09, + "grad_norm": 1.202915906906128, + "learning_rate": 0.0001510098500921552, + "loss": 1.0197, + "step": 69 + }, + { + "epoch": 1.11, + "grad_norm": 1.1750216484069824, + "learning_rate": 0.000149492499385182, + "loss": 1.0542, + "step": 70 + }, + { + "epoch": 1.12, + "grad_norm": 1.2564030885696411, + "learning_rate": 0.00014795990387151718, + "loss": 1.105, + "step": 71 + }, + { + "epoch": 1.14, + "grad_norm": 1.5712127685546875, + "learning_rate": 0.0001464125356251644, + "loss": 1.1838, + "step": 72 + }, + { + "epoch": 1.15, + "grad_norm": 1.232422947883606, + "learning_rate": 0.00014485087127046254, + "loss": 1.0357, + "step": 73 + }, + { + "epoch": 1.17, + "grad_norm": 1.5472371578216553, + "learning_rate": 0.00014327539183527446, + "loss": 1.1008, + "step": 74 + }, + { + "epoch": 1.18, + "grad_norm": 1.2168437242507935, + "learning_rate": 0.00014168658260281945, + "loss": 0.999, + "step": 75 + }, + { + "epoch": 1.2, + "grad_norm": 1.2842482328414917, + "learning_rate": 0.0001400849329621953, + "loss": 1.0174, + "step": 76 + }, + { + "epoch": 1.22, + "grad_norm": 1.3613685369491577, + "learning_rate": 0.00013847093625763516, + "loss": 1.0749, + "step": 77 + }, + { + "epoch": 1.23, + "grad_norm": 1.313064694404602, + "learning_rate": 0.0001368450896365467, + "loss": 1.0647, + "step": 78 + }, + { + "epoch": 1.25, + "grad_norm": 1.2898201942443848, + "learning_rate": 0.000135207893896379, + "loss": 0.9813, + "step": 79 + }, + { + "epoch": 1.26, + "grad_norm": 1.3023165464401245, + "learning_rate": 0.0001335598533303662, + "loss": 1.0158, + "step": 80 + }, + { + "epoch": 1.26, + "eval_loss": 1.0760670900344849, + "eval_runtime": 5.2422, + "eval_samples_per_second": 7.63, + "eval_steps_per_second": 1.908, + "step": 80 + }, + { + "epoch": 1.28, + "grad_norm": 1.3555819988250732, + "learning_rate": 0.00013190147557219338, + "loss": 1.1006, + "step": 81 + }, + { + "epoch": 1.29, + "grad_norm": 1.4895110130310059, + "learning_rate": 0.00013023327143963414, + "loss": 1.0924, + "step": 82 + }, + { + "epoch": 1.31, + "grad_norm": 1.3837958574295044, + "learning_rate": 0.0001285557547772072, + "loss": 1.0098, + "step": 83 + }, + { + "epoch": 1.33, + "grad_norm": 1.3755472898483276, + "learning_rate": 0.00012686944229790042, + "loss": 1.0196, + "step": 84 + }, + { + "epoch": 1.34, + "grad_norm": 1.115247368812561, + "learning_rate": 0.00012517485342401202, + "loss": 0.9746, + "step": 85 + }, + { + "epoch": 1.36, + "grad_norm": 1.2877663373947144, + "learning_rate": 0.00012347251012715627, + "loss": 1.0134, + "step": 86 + }, + { + "epoch": 1.37, + "grad_norm": 1.182273507118225, + "learning_rate": 0.00012176293676748493, + "loss": 1.0437, + "step": 87 + }, + { + "epoch": 1.39, + "grad_norm": 1.2405059337615967, + "learning_rate": 0.0001200466599321721, + "loss": 0.9615, + "step": 88 + }, + { + "epoch": 1.41, + "grad_norm": 1.3767961263656616, + "learning_rate": 0.00011832420827321373, + "loss": 1.0392, + "step": 89 + }, + { + "epoch": 1.42, + "grad_norm": 1.2421908378601074, + "learning_rate": 0.0001165961123445908, + "loss": 1.0476, + "step": 90 + }, + { + "epoch": 1.44, + "grad_norm": 1.2321393489837646, + "learning_rate": 0.00011486290443884666, + "loss": 0.9779, + "step": 91 + }, + { + "epoch": 1.45, + "grad_norm": 1.3787308931350708, + "learning_rate": 0.00011312511842312909, + "loss": 0.989, + "step": 92 + }, + { + "epoch": 1.47, + "grad_norm": 1.233562707901001, + "learning_rate": 0.00011138328957474691, + "loss": 0.9887, + "step": 93 + }, + { + "epoch": 1.48, + "grad_norm": 1.2139899730682373, + "learning_rate": 0.00010963795441629274, + "loss": 0.9525, + "step": 94 + }, + { + "epoch": 1.5, + "grad_norm": 1.5555188655853271, + "learning_rate": 0.00010788965055038178, + "loss": 1.0339, + "step": 95 + }, + { + "epoch": 1.52, + "grad_norm": 1.7850981950759888, + "learning_rate": 0.00010613891649405815, + "loss": 1.1342, + "step": 96 + }, + { + "epoch": 1.52, + "eval_loss": 1.0439285039901733, + "eval_runtime": 5.2194, + "eval_samples_per_second": 7.664, + "eval_steps_per_second": 1.916, + "step": 96 + }, + { + "epoch": 1.53, + "grad_norm": 1.3491744995117188, + "learning_rate": 0.00010438629151291943, + "loss": 1.0061, + "step": 97 + }, + { + "epoch": 1.55, + "grad_norm": 1.359479308128357, + "learning_rate": 0.00010263231545501068, + "loss": 0.9811, + "step": 98 + }, + { + "epoch": 1.56, + "grad_norm": 1.1872361898422241, + "learning_rate": 0.00010087752858453923, + "loss": 0.9766, + "step": 99 + }, + { + "epoch": 1.58, + "grad_norm": 1.2711139917373657, + "learning_rate": 9.912247141546079e-05, + "loss": 1.0206, + "step": 100 + }, + { + "epoch": 1.59, + "grad_norm": 1.5995763540267944, + "learning_rate": 9.736768454498936e-05, + "loss": 1.0461, + "step": 101 + }, + { + "epoch": 1.61, + "grad_norm": 2.560168743133545, + "learning_rate": 9.56137084870806e-05, + "loss": 1.1224, + "step": 102 + }, + { + "epoch": 1.63, + "grad_norm": 1.3833796977996826, + "learning_rate": 9.38610835059419e-05, + "loss": 1.063, + "step": 103 + }, + { + "epoch": 1.64, + "grad_norm": 1.840963363647461, + "learning_rate": 9.211034944961823e-05, + "loss": 1.1159, + "step": 104 + }, + { + "epoch": 1.66, + "grad_norm": 1.2169251441955566, + "learning_rate": 9.036204558370725e-05, + "loss": 0.9595, + "step": 105 + }, + { + "epoch": 1.67, + "grad_norm": 1.2543871402740479, + "learning_rate": 8.861671042525311e-05, + "loss": 1.0113, + "step": 106 + }, + { + "epoch": 1.69, + "grad_norm": 1.2910866737365723, + "learning_rate": 8.68748815768709e-05, + "loss": 0.9819, + "step": 107 + }, + { + "epoch": 1.71, + "grad_norm": 1.3683500289916992, + "learning_rate": 8.513709556115335e-05, + "loss": 0.9015, + "step": 108 + }, + { + "epoch": 1.72, + "grad_norm": 1.5607995986938477, + "learning_rate": 8.340388765540923e-05, + "loss": 1.0256, + "step": 109 + }, + { + "epoch": 1.74, + "grad_norm": 1.6682592630386353, + "learning_rate": 8.16757917267863e-05, + "loss": 1.0977, + "step": 110 + }, + { + "epoch": 1.75, + "grad_norm": 1.2137242555618286, + "learning_rate": 7.995334006782793e-05, + "loss": 0.9663, + "step": 111 + }, + { + "epoch": 1.77, + "grad_norm": 1.2425730228424072, + "learning_rate": 7.823706323251512e-05, + "loss": 0.955, + "step": 112 + }, + { + "epoch": 1.77, + "eval_loss": 1.0228078365325928, + "eval_runtime": 5.2561, + "eval_samples_per_second": 7.61, + "eval_steps_per_second": 1.903, + "step": 112 + }, + { + "epoch": 1.78, + "grad_norm": 1.415521502494812, + "learning_rate": 7.652748987284375e-05, + "loss": 1.037, + "step": 113 + }, + { + "epoch": 1.8, + "grad_norm": 1.2909739017486572, + "learning_rate": 7.482514657598799e-05, + "loss": 0.8776, + "step": 114 + }, + { + "epoch": 1.82, + "grad_norm": 1.7562286853790283, + "learning_rate": 7.31305577020996e-05, + "loss": 0.9811, + "step": 115 + }, + { + "epoch": 1.83, + "grad_norm": 1.5445969104766846, + "learning_rate": 7.144424522279283e-05, + "loss": 0.9487, + "step": 116 + }, + { + "epoch": 1.85, + "grad_norm": 1.475395679473877, + "learning_rate": 6.976672856036585e-05, + "loss": 1.0024, + "step": 117 + }, + { + "epoch": 1.86, + "grad_norm": 1.3962652683258057, + "learning_rate": 6.809852442780664e-05, + "loss": 0.9585, + "step": 118 + }, + { + "epoch": 1.88, + "grad_norm": 1.430710792541504, + "learning_rate": 6.644014666963386e-05, + "loss": 1.0057, + "step": 119 + }, + { + "epoch": 1.89, + "grad_norm": 1.3526166677474976, + "learning_rate": 6.479210610362103e-05, + "loss": 0.9973, + "step": 120 + }, + { + "epoch": 1.91, + "grad_norm": 1.506891131401062, + "learning_rate": 6.315491036345338e-05, + "loss": 0.9622, + "step": 121 + }, + { + "epoch": 1.93, + "grad_norm": 1.2012327909469604, + "learning_rate": 6.152906374236485e-05, + "loss": 0.9271, + "step": 122 + }, + { + "epoch": 1.94, + "grad_norm": 1.6816527843475342, + "learning_rate": 5.991506703780475e-05, + "loss": 1.0185, + "step": 123 + }, + { + "epoch": 1.96, + "grad_norm": 1.5374231338500977, + "learning_rate": 5.831341739718055e-05, + "loss": 0.9864, + "step": 124 + }, + { + "epoch": 1.97, + "grad_norm": 1.3972766399383545, + "learning_rate": 5.672460816472556e-05, + "loss": 0.9894, + "step": 125 + }, + { + "epoch": 1.99, + "grad_norm": 1.3701297044754028, + "learning_rate": 5.5149128729537457e-05, + "loss": 0.9733, + "step": 126 + }, + { + "epoch": 2.01, + "grad_norm": 1.5475809574127197, + "learning_rate": 5.3587464374835596e-05, + "loss": 1.0167, + "step": 127 + }, + { + "epoch": 2.02, + "grad_norm": 1.5290844440460205, + "learning_rate": 5.2040096128482876e-05, + "loss": 1.0033, + "step": 128 + }, + { + "epoch": 2.02, + "eval_loss": 1.008642315864563, + "eval_runtime": 5.2029, + "eval_samples_per_second": 7.688, + "eval_steps_per_second": 1.922, + "step": 128 + }, + { + "epoch": 2.04, + "grad_norm": 1.4941701889038086, + "learning_rate": 5.0507500614817995e-05, + "loss": 0.892, + "step": 129 + }, + { + "epoch": 2.05, + "grad_norm": 3.3490726947784424, + "learning_rate": 4.899014990784485e-05, + "loss": 0.8973, + "step": 130 + }, + { + "epoch": 2.07, + "grad_norm": 1.2334100008010864, + "learning_rate": 4.748851138582269e-05, + "loss": 0.9412, + "step": 131 + }, + { + "epoch": 2.08, + "grad_norm": 1.171141266822815, + "learning_rate": 4.6003047587303374e-05, + "loss": 0.8716, + "step": 132 + }, + { + "epoch": 2.1, + "grad_norm": 1.490692377090454, + "learning_rate": 4.453421606865868e-05, + "loss": 0.8921, + "step": 133 + }, + { + "epoch": 2.12, + "grad_norm": 1.7814823389053345, + "learning_rate": 4.3082469263143065e-05, + "loss": 0.9089, + "step": 134 + }, + { + "epoch": 2.13, + "grad_norm": 1.602158784866333, + "learning_rate": 4.164825434153381e-05, + "loss": 0.9109, + "step": 135 + }, + { + "epoch": 2.15, + "grad_norm": 1.4815092086791992, + "learning_rate": 4.0232013074392064e-05, + "loss": 0.9481, + "step": 136 + }, + { + "epoch": 2.16, + "grad_norm": 1.422029972076416, + "learning_rate": 3.8834181695988084e-05, + "loss": 0.8452, + "step": 137 + }, + { + "epoch": 2.18, + "grad_norm": 1.2132339477539062, + "learning_rate": 3.745519076993078e-05, + "loss": 0.9289, + "step": 138 + }, + { + "epoch": 2.19, + "grad_norm": 1.4218840599060059, + "learning_rate": 3.609546505654462e-05, + "loss": 0.9368, + "step": 139 + }, + { + "epoch": 2.21, + "grad_norm": 1.5280232429504395, + "learning_rate": 3.475542338203377e-05, + "loss": 0.9078, + "step": 140 + }, + { + "epoch": 2.23, + "grad_norm": 1.5697394609451294, + "learning_rate": 3.343547850947434e-05, + "loss": 0.9264, + "step": 141 + }, + { + "epoch": 2.24, + "grad_norm": 1.58542001247406, + "learning_rate": 3.21360370116738e-05, + "loss": 0.8733, + "step": 142 + }, + { + "epoch": 2.26, + "grad_norm": 1.6135077476501465, + "learning_rate": 3.085749914593752e-05, + "loss": 0.9952, + "step": 143 + }, + { + "epoch": 2.27, + "grad_norm": 1.562941551208496, + "learning_rate": 2.9600258730780562e-05, + "loss": 1.0169, + "step": 144 + }, + { + "epoch": 2.27, + "eval_loss": 1.003143072128296, + "eval_runtime": 5.2316, + "eval_samples_per_second": 7.646, + "eval_steps_per_second": 1.911, + "step": 144 + }, + { + "epoch": 2.29, + "grad_norm": 1.5057990550994873, + "learning_rate": 2.8364703024622473e-05, + "loss": 0.9324, + "step": 145 + }, + { + "epoch": 2.31, + "grad_norm": 1.617148518562317, + "learning_rate": 2.715121260650316e-05, + "loss": 0.9765, + "step": 146 + }, + { + "epoch": 2.32, + "grad_norm": 1.8736790418624878, + "learning_rate": 2.596016125885581e-05, + "loss": 0.9421, + "step": 147 + }, + { + "epoch": 2.34, + "grad_norm": 1.489425778388977, + "learning_rate": 2.4791915852373605e-05, + "loss": 0.9899, + "step": 148 + }, + { + "epoch": 2.35, + "grad_norm": 2.661579132080078, + "learning_rate": 2.364683623300513e-05, + "loss": 1.21, + "step": 149 + }, + { + "epoch": 2.37, + "grad_norm": 1.7774057388305664, + "learning_rate": 2.2525275111113807e-05, + "loss": 0.9954, + "step": 150 + }, + { + "epoch": 2.38, + "grad_norm": 1.4010025262832642, + "learning_rate": 2.142757795283504e-05, + "loss": 0.9268, + "step": 151 + }, + { + "epoch": 2.4, + "grad_norm": 1.3390811681747437, + "learning_rate": 2.0354082873665015e-05, + "loss": 0.8932, + "step": 152 + }, + { + "epoch": 2.42, + "grad_norm": 1.398987054824829, + "learning_rate": 1.9305120534313293e-05, + "loss": 0.9264, + "step": 153 + }, + { + "epoch": 2.43, + "grad_norm": 1.4676743745803833, + "learning_rate": 1.8281014038851963e-05, + "loss": 0.9024, + "step": 154 + }, + { + "epoch": 2.45, + "grad_norm": 1.6303927898406982, + "learning_rate": 1.728207883519236e-05, + "loss": 1.0011, + "step": 155 + }, + { + "epoch": 2.46, + "grad_norm": 1.4315526485443115, + "learning_rate": 1.630862261791982e-05, + "loss": 0.9404, + "step": 156 + }, + { + "epoch": 2.48, + "grad_norm": 1.7993364334106445, + "learning_rate": 1.536094523351693e-05, + "loss": 0.9953, + "step": 157 + }, + { + "epoch": 2.49, + "grad_norm": 1.37579345703125, + "learning_rate": 1.4439338588004004e-05, + "loss": 0.8594, + "step": 158 + }, + { + "epoch": 2.51, + "grad_norm": 1.5228403806686401, + "learning_rate": 1.3544086557025493e-05, + "loss": 0.927, + "step": 159 + }, + { + "epoch": 2.53, + "grad_norm": 1.5382976531982422, + "learning_rate": 1.2675464898409772e-05, + "loss": 0.9127, + "step": 160 + }, + { + "epoch": 2.53, + "eval_loss": 0.9940541982650757, + "eval_runtime": 5.2375, + "eval_samples_per_second": 7.637, + "eval_steps_per_second": 1.909, + "step": 160 + }, + { + "epoch": 2.54, + "grad_norm": 1.4625706672668457, + "learning_rate": 1.1833741167229585e-05, + "loss": 0.9324, + "step": 161 + }, + { + "epoch": 2.56, + "grad_norm": 1.4920152425765991, + "learning_rate": 1.1019174633389073e-05, + "loss": 0.9125, + "step": 162 + }, + { + "epoch": 2.57, + "grad_norm": 1.3073935508728027, + "learning_rate": 1.0232016201762696e-05, + "loss": 0.9382, + "step": 163 + }, + { + "epoch": 2.59, + "grad_norm": 1.5801061391830444, + "learning_rate": 9.472508334910945e-06, + "loss": 0.9564, + "step": 164 + }, + { + "epoch": 2.61, + "grad_norm": 1.5793989896774292, + "learning_rate": 8.740884978396357e-06, + "loss": 0.9087, + "step": 165 + }, + { + "epoch": 2.62, + "grad_norm": 1.3114168643951416, + "learning_rate": 8.037371488723078e-06, + "loss": 0.8847, + "step": 166 + }, + { + "epoch": 2.64, + "grad_norm": 1.423766016960144, + "learning_rate": 7.3621845639218704e-06, + "loss": 0.8316, + "step": 167 + }, + { + "epoch": 2.65, + "grad_norm": 1.607203722000122, + "learning_rate": 6.715532176802297e-06, + "loss": 0.9283, + "step": 168 + }, + { + "epoch": 2.67, + "grad_norm": 1.460830569267273, + "learning_rate": 6.0976135108923636e-06, + "loss": 0.876, + "step": 169 + }, + { + "epoch": 2.68, + "grad_norm": 1.5523853302001953, + "learning_rate": 5.508618899085583e-06, + "loss": 0.837, + "step": 170 + }, + { + "epoch": 2.7, + "grad_norm": 1.6450238227844238, + "learning_rate": 4.948729765014004e-06, + "loss": 0.9314, + "step": 171 + }, + { + "epoch": 2.72, + "grad_norm": 1.5870205163955688, + "learning_rate": 4.418118567165763e-06, + "loss": 0.975, + "step": 172 + }, + { + "epoch": 2.73, + "grad_norm": 1.4294880628585815, + "learning_rate": 3.916948745763937e-06, + "loss": 0.8901, + "step": 173 + }, + { + "epoch": 2.75, + "grad_norm": 1.434590220451355, + "learning_rate": 3.4453746724232515e-06, + "loss": 0.9697, + "step": 174 + }, + { + "epoch": 2.76, + "grad_norm": 1.6419281959533691, + "learning_rate": 3.003541602600157e-06, + "loss": 0.9479, + "step": 175 + }, + { + "epoch": 2.78, + "grad_norm": 1.55467689037323, + "learning_rate": 2.5915856308508345e-06, + "loss": 0.93, + "step": 176 + }, + { + "epoch": 2.78, + "eval_loss": 0.9941838979721069, + "eval_runtime": 5.2266, + "eval_samples_per_second": 7.653, + "eval_steps_per_second": 1.913, + "step": 176 + }, + { + "epoch": 2.79, + "grad_norm": 1.4256466627120972, + "learning_rate": 2.2096336489111026e-06, + "loss": 0.9518, + "step": 177 + }, + { + "epoch": 2.81, + "grad_norm": 1.6299464702606201, + "learning_rate": 1.8578033066107393e-06, + "loss": 0.9864, + "step": 178 + }, + { + "epoch": 2.83, + "grad_norm": 1.3261350393295288, + "learning_rate": 1.5362029756348373e-06, + "loss": 0.8373, + "step": 179 + }, + { + "epoch": 2.84, + "grad_norm": 1.5447438955307007, + "learning_rate": 1.2449317161427943e-06, + "loss": 0.9151, + "step": 180 + }, + { + "epoch": 2.86, + "grad_norm": 2.195840358734131, + "learning_rate": 9.840792462555426e-07, + "loss": 0.9899, + "step": 181 + }, + { + "epoch": 2.87, + "grad_norm": 1.4806081056594849, + "learning_rate": 7.537259144203779e-07, + "loss": 1.017, + "step": 182 + }, + { + "epoch": 2.89, + "grad_norm": 1.3689913749694824, + "learning_rate": 5.539426746618336e-07, + "loss": 0.8656, + "step": 183 + }, + { + "epoch": 2.91, + "grad_norm": 1.4289988279342651, + "learning_rate": 3.847910647261754e-07, + "loss": 0.9297, + "step": 184 + }, + { + "epoch": 2.92, + "grad_norm": 1.7640506029129028, + "learning_rate": 2.463231871264626e-07, + "loss": 0.9178, + "step": 185 + }, + { + "epoch": 2.94, + "grad_norm": 1.3823715448379517, + "learning_rate": 1.3858169309376444e-07, + "loss": 0.9268, + "step": 186 + }, + { + "epoch": 2.95, + "grad_norm": 1.3464597463607788, + "learning_rate": 6.159976943959089e-08, + "loss": 0.8624, + "step": 187 + }, + { + "epoch": 2.97, + "grad_norm": 1.5800071954727173, + "learning_rate": 1.5401128333669688e-08, + "loss": 0.9216, + "step": 188 + }, + { + "epoch": 2.98, + "grad_norm": 1.7508342266082764, + "learning_rate": 0.0, + "loss": 1.0113, + "step": 189 + } + ], + "logging_steps": 1, + "max_steps": 189, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 63, + "total_flos": 4.956581729717453e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/out/checkpoint-189/training_args.bin b/out/checkpoint-189/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47200fcd99a14764a486a432d79a2faffdf139d3 --- /dev/null +++ b/out/checkpoint-189/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c270bfcfa5d2bb7a14a890f8d6ed0bdf6d120bfca89bef54d9b15f8b78d675e4 +size 5688 diff --git a/out/checkpoint-230/README.md b/out/checkpoint-230/README.md new file mode 100644 index 0000000000000000000000000000000000000000..09b95f7bb1935f89566de0ae0601274fa95c48f2 --- /dev/null +++ b/out/checkpoint-230/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-Instruct-v0.2 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.9.0 \ No newline at end of file diff --git a/out/checkpoint-230/adapter_config.json b/out/checkpoint-230/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9b68237c5b29f907e3cc47e1a1c75ae55cd0de55 --- /dev/null +++ b/out/checkpoint-230/adapter_config.json @@ -0,0 +1,28 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/out/checkpoint-230/adapter_model.safetensors b/out/checkpoint-230/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4e02df8f225781a0814c80200ee2a19e9888210a --- /dev/null +++ b/out/checkpoint-230/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5ea5ef1d4afd72c81fc82043a925c7d20934338c766a6669cbd6a50eed11170 +size 13648432 diff --git a/out/checkpoint-230/optimizer.pt b/out/checkpoint-230/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0e8e0bc49208bf15f9c77916355a6a91616c311a --- /dev/null +++ b/out/checkpoint-230/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2c46fe8f27fbf136aea9c87cef2c7dcb8ae66690dadf3fac9e52f32a1312bd5 +size 6979130 diff --git a/out/checkpoint-230/rng_state_0.pth b/out/checkpoint-230/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..bb0196ced71d5766121c4773cb2e62bcc65b315c --- /dev/null +++ b/out/checkpoint-230/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec8b0a99647b825886e1733b3af7f6342bbfb2b2170ac9b308fe1743bb971d98 +size 14512 diff --git a/out/checkpoint-230/rng_state_1.pth b/out/checkpoint-230/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..5df10d9ad51093421aa75383b7874f8503ad0f0d --- /dev/null +++ b/out/checkpoint-230/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d6df5d0602fcac61681c85f79710377320f9fdd1d9fffc2b26d129d44e3423d +size 14512 diff --git a/out/checkpoint-230/scheduler.pt b/out/checkpoint-230/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..833f436c5bb3014059cbdeab37f54c89761a9265 --- /dev/null +++ b/out/checkpoint-230/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a94db31ecff2830d0c9dbd645009e4a63c2f9173e5428eac06cd129d112f9a92 +size 1064 diff --git a/out/checkpoint-230/special_tokens_map.json b/out/checkpoint-230/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..72ecfeeb7e14d244c936169d2ed139eeae235ef1 --- /dev/null +++ b/out/checkpoint-230/special_tokens_map.json @@ -0,0 +1,24 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/out/checkpoint-230/tokenizer.model b/out/checkpoint-230/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/out/checkpoint-230/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/out/checkpoint-230/tokenizer_config.json b/out/checkpoint-230/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a583d0f8c569fedb4f95590b442b2ac9f5fbd0bf --- /dev/null +++ b/out/checkpoint-230/tokenizer_config.json @@ -0,0 +1,45 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": true, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "bos_token": "", + "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false, + "use_fast": true +} diff --git a/out/checkpoint-230/trainer_state.json b/out/checkpoint-230/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..187ba3d5c4e30032d1265c0c96a2de513bb0f34a --- /dev/null +++ b/out/checkpoint-230/trainer_state.json @@ -0,0 +1,1695 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9942196531791907, + "eval_steps": 29, + "global_step": 230, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "grad_norm": 4.158745288848877, + "learning_rate": 2e-05, + "loss": 3.2348, + "step": 1 + }, + { + "epoch": 0.01, + "eval_loss": 3.0255825519561768, + "eval_runtime": 10.0801, + "eval_samples_per_second": 7.242, + "eval_steps_per_second": 1.885, + "step": 1 + }, + { + "epoch": 0.02, + "grad_norm": 3.48564076423645, + "learning_rate": 4e-05, + "loss": 3.0591, + "step": 2 + }, + { + "epoch": 0.03, + "grad_norm": 4.051155090332031, + "learning_rate": 6e-05, + "loss": 3.345, + "step": 3 + }, + { + "epoch": 0.03, + "grad_norm": 2.94571852684021, + "learning_rate": 8e-05, + "loss": 2.9206, + "step": 4 + }, + { + "epoch": 0.04, + "grad_norm": 3.961529493331909, + "learning_rate": 0.0001, + "loss": 3.0326, + "step": 5 + }, + { + "epoch": 0.05, + "grad_norm": 2.709989547729492, + "learning_rate": 0.00012, + "loss": 2.6911, + "step": 6 + }, + { + "epoch": 0.06, + "grad_norm": 2.7331080436706543, + "learning_rate": 0.00014, + "loss": 2.5443, + "step": 7 + }, + { + "epoch": 0.07, + "grad_norm": 3.204464912414551, + "learning_rate": 0.00016, + "loss": 2.5651, + "step": 8 + }, + { + "epoch": 0.08, + "grad_norm": 2.8977890014648438, + "learning_rate": 0.00018, + "loss": 2.5079, + "step": 9 + }, + { + "epoch": 0.09, + "grad_norm": 2.810033082962036, + "learning_rate": 0.0002, + "loss": 2.2729, + "step": 10 + }, + { + "epoch": 0.1, + "grad_norm": 2.0960497856140137, + "learning_rate": 0.00019999560279257314, + "loss": 2.2163, + "step": 11 + }, + { + "epoch": 0.1, + "grad_norm": 2.17856502532959, + "learning_rate": 0.0001999824115570012, + "loss": 2.1658, + "step": 12 + }, + { + "epoch": 0.11, + "grad_norm": 1.9092390537261963, + "learning_rate": 0.00019996042745337617, + "loss": 2.0652, + "step": 13 + }, + { + "epoch": 0.12, + "grad_norm": 2.242626905441284, + "learning_rate": 0.0001999296524150713, + "loss": 1.9754, + "step": 14 + }, + { + "epoch": 0.13, + "grad_norm": 2.0627095699310303, + "learning_rate": 0.00019989008914857116, + "loss": 1.9169, + "step": 15 + }, + { + "epoch": 0.14, + "grad_norm": 2.00533390045166, + "learning_rate": 0.00019984174113323353, + "loss": 1.756, + "step": 16 + }, + { + "epoch": 0.15, + "grad_norm": 1.5564405918121338, + "learning_rate": 0.00019978461262098343, + "loss": 1.8304, + "step": 17 + }, + { + "epoch": 0.16, + "grad_norm": 2.127427339553833, + "learning_rate": 0.00019971870863593925, + "loss": 1.9093, + "step": 18 + }, + { + "epoch": 0.16, + "grad_norm": 1.4882323741912842, + "learning_rate": 0.00019964403497397084, + "loss": 1.7579, + "step": 19 + }, + { + "epoch": 0.17, + "grad_norm": 1.4289195537567139, + "learning_rate": 0.00019956059820218982, + "loss": 1.5739, + "step": 20 + }, + { + "epoch": 0.18, + "grad_norm": 1.2495356798171997, + "learning_rate": 0.00019946840565837203, + "loss": 1.5548, + "step": 21 + }, + { + "epoch": 0.19, + "grad_norm": 1.476159691810608, + "learning_rate": 0.00019936746545031223, + "loss": 1.5918, + "step": 22 + }, + { + "epoch": 0.2, + "grad_norm": 1.399651050567627, + "learning_rate": 0.0001992577864551111, + "loss": 1.5577, + "step": 23 + }, + { + "epoch": 0.21, + "grad_norm": 1.3828766345977783, + "learning_rate": 0.0001991393783183945, + "loss": 1.5168, + "step": 24 + }, + { + "epoch": 0.22, + "grad_norm": 1.4390034675598145, + "learning_rate": 0.0001990122514534651, + "loss": 1.511, + "step": 25 + }, + { + "epoch": 0.23, + "grad_norm": 1.481458306312561, + "learning_rate": 0.00019887641704038688, + "loss": 1.4448, + "step": 26 + }, + { + "epoch": 0.23, + "grad_norm": 1.4939665794372559, + "learning_rate": 0.00019873188702500163, + "loss": 1.4146, + "step": 27 + }, + { + "epoch": 0.24, + "grad_norm": 1.5099453926086426, + "learning_rate": 0.00019857867411787847, + "loss": 1.4192, + "step": 28 + }, + { + "epoch": 0.25, + "grad_norm": 1.4652291536331177, + "learning_rate": 0.00019841679179319606, + "loss": 1.2936, + "step": 29 + }, + { + "epoch": 0.25, + "eval_loss": 1.2550688982009888, + "eval_runtime": 10.1422, + "eval_samples_per_second": 7.198, + "eval_steps_per_second": 1.873, + "step": 29 + }, + { + "epoch": 0.26, + "grad_norm": 1.5150835514068604, + "learning_rate": 0.0001982462542875576, + "loss": 1.3272, + "step": 30 + }, + { + "epoch": 0.27, + "grad_norm": 1.4407225847244263, + "learning_rate": 0.00019806707659873887, + "loss": 1.1821, + "step": 31 + }, + { + "epoch": 0.28, + "grad_norm": 1.6642241477966309, + "learning_rate": 0.0001978792744843691, + "loss": 1.391, + "step": 32 + }, + { + "epoch": 0.29, + "grad_norm": 1.5081162452697754, + "learning_rate": 0.00019768286446054532, + "loss": 1.296, + "step": 33 + }, + { + "epoch": 0.29, + "grad_norm": 1.2953822612762451, + "learning_rate": 0.0001974778638003799, + "loss": 1.1308, + "step": 34 + }, + { + "epoch": 0.3, + "grad_norm": 1.574062466621399, + "learning_rate": 0.0001972642905324813, + "loss": 1.2351, + "step": 35 + }, + { + "epoch": 0.31, + "grad_norm": 1.355309247970581, + "learning_rate": 0.00019704216343936873, + "loss": 1.0967, + "step": 36 + }, + { + "epoch": 0.32, + "grad_norm": 1.5858274698257446, + "learning_rate": 0.00019681150205582025, + "loss": 1.1853, + "step": 37 + }, + { + "epoch": 0.33, + "grad_norm": 1.1243816614151, + "learning_rate": 0.00019657232666715486, + "loss": 1.0613, + "step": 38 + }, + { + "epoch": 0.34, + "grad_norm": 1.0661962032318115, + "learning_rate": 0.00019632465830744846, + "loss": 1.0101, + "step": 39 + }, + { + "epoch": 0.35, + "grad_norm": 1.246745228767395, + "learning_rate": 0.000196068518757684, + "loss": 1.0325, + "step": 40 + }, + { + "epoch": 0.36, + "grad_norm": 1.2067025899887085, + "learning_rate": 0.00019580393054383622, + "loss": 0.9933, + "step": 41 + }, + { + "epoch": 0.36, + "grad_norm": 1.4754862785339355, + "learning_rate": 0.00019553091693489018, + "loss": 1.1773, + "step": 42 + }, + { + "epoch": 0.37, + "grad_norm": 1.314193844795227, + "learning_rate": 0.00019524950194079534, + "loss": 0.9823, + "step": 43 + }, + { + "epoch": 0.38, + "grad_norm": 1.4316176176071167, + "learning_rate": 0.00019495971031035367, + "loss": 0.9911, + "step": 44 + }, + { + "epoch": 0.39, + "grad_norm": 1.5742014646530151, + "learning_rate": 0.00019466156752904343, + "loss": 0.949, + "step": 45 + }, + { + "epoch": 0.4, + "grad_norm": 1.5437729358673096, + "learning_rate": 0.00019435509981677762, + "loss": 0.9165, + "step": 46 + }, + { + "epoch": 0.41, + "grad_norm": 1.656130075454712, + "learning_rate": 0.00019404033412559826, + "loss": 0.8911, + "step": 47 + }, + { + "epoch": 0.42, + "grad_norm": 1.6358022689819336, + "learning_rate": 0.00019371729813730606, + "loss": 0.957, + "step": 48 + }, + { + "epoch": 0.42, + "grad_norm": 1.4374278783798218, + "learning_rate": 0.00019338602026102594, + "loss": 0.8144, + "step": 49 + }, + { + "epoch": 0.43, + "grad_norm": 1.449952244758606, + "learning_rate": 0.0001930465296307087, + "loss": 0.9469, + "step": 50 + }, + { + "epoch": 0.44, + "grad_norm": 1.4793896675109863, + "learning_rate": 0.00019269885610256865, + "loss": 0.8433, + "step": 51 + }, + { + "epoch": 0.45, + "grad_norm": 1.6898478269577026, + "learning_rate": 0.00019234303025245835, + "loss": 0.7861, + "step": 52 + }, + { + "epoch": 0.46, + "grad_norm": 1.6187032461166382, + "learning_rate": 0.0001919790833731791, + "loss": 1.0128, + "step": 53 + }, + { + "epoch": 0.47, + "grad_norm": 1.6767897605895996, + "learning_rate": 0.00019160704747172934, + "loss": 0.9005, + "step": 54 + }, + { + "epoch": 0.48, + "grad_norm": 1.5994288921356201, + "learning_rate": 0.00019122695526648968, + "loss": 0.8365, + "step": 55 + }, + { + "epoch": 0.49, + "grad_norm": 2.3374545574188232, + "learning_rate": 0.00019083884018434547, + "loss": 0.753, + "step": 56 + }, + { + "epoch": 0.49, + "grad_norm": 2.1330294609069824, + "learning_rate": 0.00019044273635774705, + "loss": 0.704, + "step": 57 + }, + { + "epoch": 0.5, + "grad_norm": 2.139307737350464, + "learning_rate": 0.00019003867862170832, + "loss": 0.9394, + "step": 58 + }, + { + "epoch": 0.5, + "eval_loss": 0.7861204147338867, + "eval_runtime": 10.1594, + "eval_samples_per_second": 7.185, + "eval_steps_per_second": 1.87, + "step": 58 + }, + { + "epoch": 0.51, + "grad_norm": 1.8726176023483276, + "learning_rate": 0.00018962670251074275, + "loss": 0.8582, + "step": 59 + }, + { + "epoch": 0.52, + "grad_norm": 2.6511785984039307, + "learning_rate": 0.00018920684425573865, + "loss": 0.7147, + "step": 60 + }, + { + "epoch": 0.53, + "grad_norm": 1.792511224746704, + "learning_rate": 0.0001887791407807728, + "loss": 0.6567, + "step": 61 + }, + { + "epoch": 0.54, + "grad_norm": 1.798560619354248, + "learning_rate": 0.00018834362969986308, + "loss": 0.6262, + "step": 62 + }, + { + "epoch": 0.55, + "grad_norm": 1.9578020572662354, + "learning_rate": 0.00018790034931366072, + "loss": 0.669, + "step": 63 + }, + { + "epoch": 0.55, + "grad_norm": 1.8020246028900146, + "learning_rate": 0.00018744933860608183, + "loss": 0.5335, + "step": 64 + }, + { + "epoch": 0.56, + "grad_norm": 2.5128743648529053, + "learning_rate": 0.00018699063724087904, + "loss": 0.6338, + "step": 65 + }, + { + "epoch": 0.57, + "grad_norm": 2.0579841136932373, + "learning_rate": 0.0001865242855581534, + "loss": 0.7927, + "step": 66 + }, + { + "epoch": 0.58, + "grad_norm": 2.1528258323669434, + "learning_rate": 0.00018605032457080653, + "loss": 1.0926, + "step": 67 + }, + { + "epoch": 0.59, + "grad_norm": 1.609959363937378, + "learning_rate": 0.00018556879596093393, + "loss": 0.7551, + "step": 68 + }, + { + "epoch": 0.6, + "grad_norm": 1.6629923582077026, + "learning_rate": 0.00018507974207615917, + "loss": 0.8931, + "step": 69 + }, + { + "epoch": 0.61, + "grad_norm": 1.9914518594741821, + "learning_rate": 0.00018458320592590975, + "loss": 0.6009, + "step": 70 + }, + { + "epoch": 0.62, + "grad_norm": 1.7312421798706055, + "learning_rate": 0.00018407923117763462, + "loss": 1.0439, + "step": 71 + }, + { + "epoch": 0.62, + "grad_norm": 1.5664162635803223, + "learning_rate": 0.00018356786215296386, + "loss": 0.8916, + "step": 72 + }, + { + "epoch": 0.63, + "grad_norm": 1.6064096689224243, + "learning_rate": 0.000183049143823811, + "loss": 0.526, + "step": 73 + }, + { + "epoch": 0.64, + "grad_norm": 1.5315232276916504, + "learning_rate": 0.00018252312180841776, + "loss": 0.7483, + "step": 74 + }, + { + "epoch": 0.65, + "grad_norm": 1.5293208360671997, + "learning_rate": 0.00018198984236734246, + "loss": 0.5264, + "step": 75 + }, + { + "epoch": 0.66, + "grad_norm": 1.3707643747329712, + "learning_rate": 0.00018144935239939144, + "loss": 0.5019, + "step": 76 + }, + { + "epoch": 0.67, + "grad_norm": 1.4324684143066406, + "learning_rate": 0.00018090169943749476, + "loss": 0.847, + "step": 77 + }, + { + "epoch": 0.68, + "grad_norm": 1.9568486213684082, + "learning_rate": 0.00018034693164452578, + "loss": 0.8341, + "step": 78 + }, + { + "epoch": 0.68, + "grad_norm": 2.0321366786956787, + "learning_rate": 0.0001797850978090658, + "loss": 0.8289, + "step": 79 + }, + { + "epoch": 0.69, + "grad_norm": 1.8444325923919678, + "learning_rate": 0.00017921624734111292, + "loss": 0.6042, + "step": 80 + }, + { + "epoch": 0.7, + "grad_norm": 1.4516267776489258, + "learning_rate": 0.0001786404302677374, + "loss": 0.7954, + "step": 81 + }, + { + "epoch": 0.71, + "grad_norm": 1.318422794342041, + "learning_rate": 0.0001780576972286813, + "loss": 0.7394, + "step": 82 + }, + { + "epoch": 0.72, + "grad_norm": 1.456667423248291, + "learning_rate": 0.0001774680994719057, + "loss": 0.7574, + "step": 83 + }, + { + "epoch": 0.73, + "grad_norm": 1.3297538757324219, + "learning_rate": 0.00017687168884908316, + "loss": 0.6806, + "step": 84 + }, + { + "epoch": 0.74, + "grad_norm": 1.8390334844589233, + "learning_rate": 0.0001762685178110382, + "loss": 0.8611, + "step": 85 + }, + { + "epoch": 0.75, + "grad_norm": 1.587986707687378, + "learning_rate": 0.00017565863940313415, + "loss": 0.6769, + "step": 86 + }, + { + "epoch": 0.75, + "grad_norm": 1.8962986469268799, + "learning_rate": 0.00017504210726060828, + "loss": 0.6422, + "step": 87 + }, + { + "epoch": 0.75, + "eval_loss": 0.6252095699310303, + "eval_runtime": 10.1853, + "eval_samples_per_second": 7.167, + "eval_steps_per_second": 1.865, + "step": 87 + }, + { + "epoch": 0.76, + "grad_norm": 2.887190580368042, + "learning_rate": 0.00017441897560385491, + "loss": 0.5887, + "step": 88 + }, + { + "epoch": 0.77, + "grad_norm": 2.028211832046509, + "learning_rate": 0.00017378929923365704, + "loss": 0.6192, + "step": 89 + }, + { + "epoch": 0.78, + "grad_norm": 1.2805747985839844, + "learning_rate": 0.0001731531335263669, + "loss": 0.5369, + "step": 90 + }, + { + "epoch": 0.79, + "grad_norm": 1.5532221794128418, + "learning_rate": 0.00017251053442903595, + "loss": 0.6197, + "step": 91 + }, + { + "epoch": 0.8, + "grad_norm": 1.1624884605407715, + "learning_rate": 0.00017186155845449466, + "loss": 0.5553, + "step": 92 + }, + { + "epoch": 0.81, + "grad_norm": 1.3367836475372314, + "learning_rate": 0.0001712062626763825, + "loss": 0.7564, + "step": 93 + }, + { + "epoch": 0.82, + "grad_norm": 1.3319028615951538, + "learning_rate": 0.00017054470472412873, + "loss": 0.714, + "step": 94 + }, + { + "epoch": 0.82, + "grad_norm": 1.060636043548584, + "learning_rate": 0.00016987694277788417, + "loss": 0.5233, + "step": 95 + }, + { + "epoch": 0.83, + "grad_norm": 2.4481117725372314, + "learning_rate": 0.0001692030355634046, + "loss": 0.5747, + "step": 96 + }, + { + "epoch": 0.84, + "grad_norm": 1.0515168905258179, + "learning_rate": 0.00016852304234688626, + "loss": 0.5198, + "step": 97 + }, + { + "epoch": 0.85, + "grad_norm": 1.1427360773086548, + "learning_rate": 0.0001678370229297535, + "loss": 0.6179, + "step": 98 + }, + { + "epoch": 0.86, + "grad_norm": 1.7050713300704956, + "learning_rate": 0.00016714503764339987, + "loss": 0.3758, + "step": 99 + }, + { + "epoch": 0.87, + "grad_norm": 1.5861639976501465, + "learning_rate": 0.00016644714734388217, + "loss": 0.7054, + "step": 100 + }, + { + "epoch": 0.88, + "grad_norm": 1.2969304323196411, + "learning_rate": 0.0001657434134065686, + "loss": 0.5552, + "step": 101 + }, + { + "epoch": 0.88, + "grad_norm": 1.33128821849823, + "learning_rate": 0.00016503389772074104, + "loss": 0.681, + "step": 102 + }, + { + "epoch": 0.89, + "grad_norm": 1.1912298202514648, + "learning_rate": 0.00016431866268415237, + "loss": 0.6007, + "step": 103 + }, + { + "epoch": 0.9, + "grad_norm": 1.2182543277740479, + "learning_rate": 0.00016359777119753885, + "loss": 0.5058, + "step": 104 + }, + { + "epoch": 0.91, + "grad_norm": 1.2563199996948242, + "learning_rate": 0.0001628712866590885, + "loss": 0.5687, + "step": 105 + }, + { + "epoch": 0.92, + "grad_norm": 1.0571441650390625, + "learning_rate": 0.00016213927295886547, + "loss": 0.4943, + "step": 106 + }, + { + "epoch": 0.93, + "grad_norm": 1.2992205619812012, + "learning_rate": 0.00016140179447319132, + "loss": 0.4525, + "step": 107 + }, + { + "epoch": 0.94, + "grad_norm": 1.0483875274658203, + "learning_rate": 0.00016065891605898357, + "loss": 0.5543, + "step": 108 + }, + { + "epoch": 0.95, + "grad_norm": 1.2245246171951294, + "learning_rate": 0.00015991070304805183, + "loss": 0.5581, + "step": 109 + }, + { + "epoch": 0.95, + "grad_norm": 1.2341938018798828, + "learning_rate": 0.00015915722124135227, + "loss": 0.6072, + "step": 110 + }, + { + "epoch": 0.96, + "grad_norm": 1.4028526544570923, + "learning_rate": 0.00015839853690320074, + "loss": 0.6442, + "step": 111 + }, + { + "epoch": 0.97, + "grad_norm": 2.1308200359344482, + "learning_rate": 0.00015763471675544547, + "loss": 0.6541, + "step": 112 + }, + { + "epoch": 0.98, + "grad_norm": 1.8692742586135864, + "learning_rate": 0.00015686582797159893, + "loss": 0.7302, + "step": 113 + }, + { + "epoch": 0.99, + "grad_norm": 1.4890114068984985, + "learning_rate": 0.00015609193817093058, + "loss": 0.7293, + "step": 114 + }, + { + "epoch": 1.0, + "grad_norm": 1.059873342514038, + "learning_rate": 0.00015531311541251995, + "loss": 0.2306, + "step": 115 + }, + { + "epoch": 1.01, + "grad_norm": 1.476172685623169, + "learning_rate": 0.00015452942818927143, + "loss": 0.4316, + "step": 116 + }, + { + "epoch": 1.01, + "eval_loss": 0.5628204941749573, + "eval_runtime": 10.197, + "eval_samples_per_second": 7.159, + "eval_steps_per_second": 1.863, + "step": 116 + }, + { + "epoch": 1.01, + "grad_norm": 1.5941603183746338, + "learning_rate": 0.00015374094542189054, + "loss": 0.6178, + "step": 117 + }, + { + "epoch": 1.02, + "grad_norm": 1.3935587406158447, + "learning_rate": 0.000152947736452823, + "loss": 0.5188, + "step": 118 + }, + { + "epoch": 1.03, + "grad_norm": 1.4468976259231567, + "learning_rate": 0.0001521498710401561, + "loss": 0.5853, + "step": 119 + }, + { + "epoch": 1.04, + "grad_norm": 1.547136664390564, + "learning_rate": 0.0001513474193514842, + "loss": 0.6912, + "step": 120 + }, + { + "epoch": 1.05, + "grad_norm": 1.5418950319290161, + "learning_rate": 0.0001505404519577379, + "loss": 0.426, + "step": 121 + }, + { + "epoch": 1.06, + "grad_norm": 1.4496601819992065, + "learning_rate": 0.00014972903982697744, + "loss": 0.4834, + "step": 122 + }, + { + "epoch": 1.07, + "grad_norm": 1.5139033794403076, + "learning_rate": 0.00014891325431815183, + "loss": 0.538, + "step": 123 + }, + { + "epoch": 1.08, + "grad_norm": 1.2647703886032104, + "learning_rate": 0.000148093167174823, + "loss": 0.5842, + "step": 124 + }, + { + "epoch": 1.08, + "grad_norm": 1.1801835298538208, + "learning_rate": 0.00014726885051885653, + "loss": 0.5099, + "step": 125 + }, + { + "epoch": 1.09, + "grad_norm": 1.692179799079895, + "learning_rate": 0.00014644037684407882, + "loss": 0.6802, + "step": 126 + }, + { + "epoch": 1.1, + "grad_norm": 1.2686200141906738, + "learning_rate": 0.00014560781900990185, + "loss": 0.5672, + "step": 127 + }, + { + "epoch": 1.11, + "grad_norm": 1.7675485610961914, + "learning_rate": 0.00014477125023491536, + "loss": 0.4094, + "step": 128 + }, + { + "epoch": 1.12, + "grad_norm": 1.0968059301376343, + "learning_rate": 0.00014393074409044802, + "loss": 0.4518, + "step": 129 + }, + { + "epoch": 1.13, + "grad_norm": 1.3362330198287964, + "learning_rate": 0.00014308637449409706, + "loss": 0.6245, + "step": 130 + }, + { + "epoch": 1.14, + "grad_norm": 1.2707240581512451, + "learning_rate": 0.00014223821570322762, + "loss": 0.4208, + "step": 131 + }, + { + "epoch": 1.14, + "grad_norm": 1.2580605745315552, + "learning_rate": 0.0001413863423084424, + "loss": 0.4996, + "step": 132 + }, + { + "epoch": 1.15, + "grad_norm": 1.566819429397583, + "learning_rate": 0.00014053082922702183, + "loss": 0.4216, + "step": 133 + }, + { + "epoch": 1.16, + "grad_norm": 1.4758243560791016, + "learning_rate": 0.00013967175169633537, + "loss": 0.4964, + "step": 134 + }, + { + "epoch": 1.17, + "grad_norm": 0.8215718865394592, + "learning_rate": 0.00013880918526722497, + "loss": 0.2789, + "step": 135 + }, + { + "epoch": 1.18, + "grad_norm": 1.3133045434951782, + "learning_rate": 0.00013794320579736083, + "loss": 0.5141, + "step": 136 + }, + { + "epoch": 1.19, + "grad_norm": 1.0844050645828247, + "learning_rate": 0.00013707388944457006, + "loss": 0.3922, + "step": 137 + }, + { + "epoch": 1.2, + "grad_norm": 1.4484992027282715, + "learning_rate": 0.00013620131266013912, + "loss": 0.5752, + "step": 138 + }, + { + "epoch": 1.21, + "grad_norm": 1.2113739252090454, + "learning_rate": 0.00013532555218209036, + "loss": 0.4957, + "step": 139 + }, + { + "epoch": 1.21, + "grad_norm": 1.2045493125915527, + "learning_rate": 0.0001344466850284333, + "loss": 0.4831, + "step": 140 + }, + { + "epoch": 1.22, + "grad_norm": 1.3927645683288574, + "learning_rate": 0.0001335647884903915, + "loss": 0.5629, + "step": 141 + }, + { + "epoch": 1.23, + "grad_norm": 1.1210253238677979, + "learning_rate": 0.00013267994012560504, + "loss": 0.4893, + "step": 142 + }, + { + "epoch": 1.24, + "grad_norm": 1.6540066003799438, + "learning_rate": 0.00013179221775131005, + "loss": 0.7858, + "step": 143 + }, + { + "epoch": 1.25, + "grad_norm": 1.1413919925689697, + "learning_rate": 0.00013090169943749476, + "loss": 0.4032, + "step": 144 + }, + { + "epoch": 1.26, + "grad_norm": 1.1475608348846436, + "learning_rate": 0.0001300084635000341, + "loss": 0.4754, + "step": 145 + }, + { + "epoch": 1.26, + "eval_loss": 0.5256438255310059, + "eval_runtime": 10.3648, + "eval_samples_per_second": 7.043, + "eval_steps_per_second": 1.833, + "step": 145 + }, + { + "epoch": 1.27, + "grad_norm": 1.4064439535140991, + "learning_rate": 0.000129112588493802, + "loss": 0.4578, + "step": 146 + }, + { + "epoch": 1.27, + "grad_norm": 1.5755749940872192, + "learning_rate": 0.0001282141532057631, + "loss": 0.7122, + "step": 147 + }, + { + "epoch": 1.28, + "grad_norm": 1.5215011835098267, + "learning_rate": 0.0001273132366480438, + "loss": 0.508, + "step": 148 + }, + { + "epoch": 1.29, + "grad_norm": 1.2629594802856445, + "learning_rate": 0.00012640991805098367, + "loss": 0.4476, + "step": 149 + }, + { + "epoch": 1.3, + "grad_norm": 1.9542852640151978, + "learning_rate": 0.00012550427685616765, + "loss": 0.5941, + "step": 150 + }, + { + "epoch": 1.31, + "grad_norm": 1.3931288719177246, + "learning_rate": 0.00012459639270943944, + "loss": 0.5879, + "step": 151 + }, + { + "epoch": 1.32, + "grad_norm": 1.2423650026321411, + "learning_rate": 0.00012368634545389733, + "loss": 0.3622, + "step": 152 + }, + { + "epoch": 1.33, + "grad_norm": 1.406791090965271, + "learning_rate": 0.00012277421512287226, + "loss": 0.5997, + "step": 153 + }, + { + "epoch": 1.34, + "grad_norm": 3.076571464538574, + "learning_rate": 0.00012186008193288962, + "loss": 0.5418, + "step": 154 + }, + { + "epoch": 1.34, + "grad_norm": 1.2042341232299805, + "learning_rate": 0.00012094402627661447, + "loss": 0.4835, + "step": 155 + }, + { + "epoch": 1.35, + "grad_norm": 1.7455346584320068, + "learning_rate": 0.00012002612871578143, + "loss": 0.5839, + "step": 156 + }, + { + "epoch": 1.36, + "grad_norm": 1.2508723735809326, + "learning_rate": 0.00011910646997411001, + "loss": 0.2307, + "step": 157 + }, + { + "epoch": 1.37, + "grad_norm": 1.636293649673462, + "learning_rate": 0.00011818513093020513, + "loss": 0.6068, + "step": 158 + }, + { + "epoch": 1.38, + "grad_norm": 1.0142595767974854, + "learning_rate": 0.0001172621926104446, + "loss": 0.3736, + "step": 159 + }, + { + "epoch": 1.39, + "grad_norm": 1.6434544324874878, + "learning_rate": 0.00011633773618185302, + "loss": 0.7835, + "step": 160 + }, + { + "epoch": 1.4, + "grad_norm": 1.6137529611587524, + "learning_rate": 0.00011541184294496392, + "loss": 0.6048, + "step": 161 + }, + { + "epoch": 1.4, + "grad_norm": 1.9305659532546997, + "learning_rate": 0.00011448459432666961, + "loss": 0.6432, + "step": 162 + }, + { + "epoch": 1.41, + "grad_norm": 1.2869210243225098, + "learning_rate": 0.00011355607187306037, + "loss": 0.2576, + "step": 163 + }, + { + "epoch": 1.42, + "grad_norm": 0.986311674118042, + "learning_rate": 0.00011262635724225272, + "loss": 0.2711, + "step": 164 + }, + { + "epoch": 1.43, + "grad_norm": 1.6139075756072998, + "learning_rate": 0.00011169553219720828, + "loss": 0.3115, + "step": 165 + }, + { + "epoch": 1.44, + "grad_norm": 1.405561923980713, + "learning_rate": 0.00011076367859854304, + "loss": 0.7212, + "step": 166 + }, + { + "epoch": 1.45, + "grad_norm": 1.2157914638519287, + "learning_rate": 0.00010983087839732833, + "loss": 0.4097, + "step": 167 + }, + { + "epoch": 1.46, + "grad_norm": 1.6579598188400269, + "learning_rate": 0.00010889721362788361, + "loss": 0.3952, + "step": 168 + }, + { + "epoch": 1.47, + "grad_norm": 1.5846188068389893, + "learning_rate": 0.000107962766400562, + "loss": 0.5481, + "step": 169 + }, + { + "epoch": 1.47, + "grad_norm": 2.198352336883545, + "learning_rate": 0.0001070276188945293, + "loss": 0.5907, + "step": 170 + }, + { + "epoch": 1.48, + "grad_norm": 0.990231454372406, + "learning_rate": 0.00010609185335053669, + "loss": 0.2935, + "step": 171 + }, + { + "epoch": 1.49, + "grad_norm": 1.4345817565917969, + "learning_rate": 0.00010515555206368815, + "loss": 0.543, + "step": 172 + }, + { + "epoch": 1.5, + "grad_norm": 1.1659619808197021, + "learning_rate": 0.00010421879737620312, + "loss": 0.4425, + "step": 173 + }, + { + "epoch": 1.51, + "grad_norm": 1.2373487949371338, + "learning_rate": 0.000103281671670175, + "loss": 0.4847, + "step": 174 + }, + { + "epoch": 1.51, + "eval_loss": 0.5034880042076111, + "eval_runtime": 10.3942, + "eval_samples_per_second": 7.023, + "eval_steps_per_second": 1.828, + "step": 174 + }, + { + "epoch": 1.52, + "grad_norm": 1.248612403869629, + "learning_rate": 0.00010234425736032607, + "loss": 0.4114, + "step": 175 + }, + { + "epoch": 1.53, + "grad_norm": 1.0567479133605957, + "learning_rate": 0.0001014066368867596, + "loss": 0.3538, + "step": 176 + }, + { + "epoch": 1.53, + "grad_norm": 1.5678746700286865, + "learning_rate": 0.00010046889270770987, + "loss": 0.6153, + "step": 177 + }, + { + "epoch": 1.54, + "grad_norm": 1.9261131286621094, + "learning_rate": 9.953110729229017e-05, + "loss": 0.5507, + "step": 178 + }, + { + "epoch": 1.55, + "grad_norm": 1.5983983278274536, + "learning_rate": 9.859336311324041e-05, + "loss": 0.5178, + "step": 179 + }, + { + "epoch": 1.56, + "grad_norm": 1.6287596225738525, + "learning_rate": 9.765574263967396e-05, + "loss": 0.4269, + "step": 180 + }, + { + "epoch": 1.57, + "grad_norm": 1.7541674375534058, + "learning_rate": 9.671832832982502e-05, + "loss": 0.8199, + "step": 181 + }, + { + "epoch": 1.58, + "grad_norm": 1.1184483766555786, + "learning_rate": 9.57812026237969e-05, + "loss": 0.5602, + "step": 182 + }, + { + "epoch": 1.59, + "grad_norm": 0.8452370762825012, + "learning_rate": 9.484444793631186e-05, + "loss": 0.2324, + "step": 183 + }, + { + "epoch": 1.6, + "grad_norm": 1.1326618194580078, + "learning_rate": 9.390814664946331e-05, + "loss": 0.4516, + "step": 184 + }, + { + "epoch": 1.6, + "grad_norm": 1.736547589302063, + "learning_rate": 9.297238110547074e-05, + "loss": 0.6761, + "step": 185 + }, + { + "epoch": 1.61, + "grad_norm": 1.2101978063583374, + "learning_rate": 9.203723359943802e-05, + "loss": 0.4739, + "step": 186 + }, + { + "epoch": 1.62, + "grad_norm": 1.1914681196212769, + "learning_rate": 9.110278637211643e-05, + "loss": 0.4065, + "step": 187 + }, + { + "epoch": 1.63, + "grad_norm": 1.277013897895813, + "learning_rate": 9.016912160267168e-05, + "loss": 0.347, + "step": 188 + }, + { + "epoch": 1.64, + "grad_norm": 1.4066228866577148, + "learning_rate": 8.923632140145701e-05, + "loss": 0.471, + "step": 189 + }, + { + "epoch": 1.65, + "grad_norm": 1.3591418266296387, + "learning_rate": 8.830446780279176e-05, + "loss": 0.6158, + "step": 190 + }, + { + "epoch": 1.66, + "grad_norm": 1.1961108446121216, + "learning_rate": 8.73736427577473e-05, + "loss": 0.5877, + "step": 191 + }, + { + "epoch": 1.66, + "grad_norm": 1.0860059261322021, + "learning_rate": 8.644392812693968e-05, + "loss": 0.476, + "step": 192 + }, + { + "epoch": 1.67, + "grad_norm": 1.1880991458892822, + "learning_rate": 8.55154056733304e-05, + "loss": 0.3546, + "step": 193 + }, + { + "epoch": 1.68, + "grad_norm": 1.4166094064712524, + "learning_rate": 8.458815705503611e-05, + "loss": 0.5122, + "step": 194 + }, + { + "epoch": 1.69, + "grad_norm": 1.1290371417999268, + "learning_rate": 8.366226381814697e-05, + "loss": 0.5407, + "step": 195 + }, + { + "epoch": 1.7, + "grad_norm": 1.3259081840515137, + "learning_rate": 8.273780738955544e-05, + "loss": 0.4521, + "step": 196 + }, + { + "epoch": 1.71, + "grad_norm": 1.1714779138565063, + "learning_rate": 8.181486906979487e-05, + "loss": 0.5858, + "step": 197 + }, + { + "epoch": 1.72, + "grad_norm": 0.8648713231086731, + "learning_rate": 8.089353002589001e-05, + "loss": 0.3257, + "step": 198 + }, + { + "epoch": 1.73, + "grad_norm": 1.0609321594238281, + "learning_rate": 7.997387128421858e-05, + "loss": 0.5146, + "step": 199 + }, + { + "epoch": 1.73, + "grad_norm": 1.1280933618545532, + "learning_rate": 7.905597372338558e-05, + "loss": 0.4356, + "step": 200 + }, + { + "epoch": 1.74, + "grad_norm": 1.4397556781768799, + "learning_rate": 7.813991806711039e-05, + "loss": 0.4239, + "step": 201 + }, + { + "epoch": 1.75, + "grad_norm": 1.3741945028305054, + "learning_rate": 7.722578487712776e-05, + "loss": 0.4366, + "step": 202 + }, + { + "epoch": 1.76, + "grad_norm": 1.1792335510253906, + "learning_rate": 7.631365454610273e-05, + "loss": 0.4473, + "step": 203 + }, + { + "epoch": 1.76, + "eval_loss": 0.48859503865242004, + "eval_runtime": 10.4015, + "eval_samples_per_second": 7.018, + "eval_steps_per_second": 1.827, + "step": 203 + }, + { + "epoch": 1.77, + "grad_norm": 1.2233651876449585, + "learning_rate": 7.540360729056058e-05, + "loss": 0.5488, + "step": 204 + }, + { + "epoch": 1.78, + "grad_norm": 1.4724386930465698, + "learning_rate": 7.449572314383237e-05, + "loss": 0.4788, + "step": 205 + }, + { + "epoch": 1.79, + "grad_norm": 0.9696942567825317, + "learning_rate": 7.359008194901632e-05, + "loss": 0.4268, + "step": 206 + }, + { + "epoch": 1.79, + "grad_norm": 1.8895478248596191, + "learning_rate": 7.268676335195623e-05, + "loss": 0.752, + "step": 207 + }, + { + "epoch": 1.8, + "grad_norm": 1.5549920797348022, + "learning_rate": 7.178584679423695e-05, + "loss": 0.2853, + "step": 208 + }, + { + "epoch": 1.81, + "grad_norm": 1.517907738685608, + "learning_rate": 7.088741150619803e-05, + "loss": 0.4255, + "step": 209 + }, + { + "epoch": 1.82, + "grad_norm": 1.0410192012786865, + "learning_rate": 6.999153649996595e-05, + "loss": 0.3361, + "step": 210 + }, + { + "epoch": 1.83, + "grad_norm": 1.5179475545883179, + "learning_rate": 6.909830056250527e-05, + "loss": 0.6939, + "step": 211 + }, + { + "epoch": 1.84, + "grad_norm": 1.173094391822815, + "learning_rate": 6.820778224868998e-05, + "loss": 0.2382, + "step": 212 + }, + { + "epoch": 1.85, + "grad_norm": 1.077552318572998, + "learning_rate": 6.732005987439494e-05, + "loss": 0.4151, + "step": 213 + }, + { + "epoch": 1.86, + "grad_norm": 1.062132716178894, + "learning_rate": 6.643521150960854e-05, + "loss": 0.4051, + "step": 214 + }, + { + "epoch": 1.86, + "grad_norm": 0.9788100123405457, + "learning_rate": 6.555331497156672e-05, + "loss": 0.4699, + "step": 215 + }, + { + "epoch": 1.87, + "grad_norm": 1.193488359451294, + "learning_rate": 6.467444781790966e-05, + "loss": 0.5863, + "step": 216 + }, + { + "epoch": 1.88, + "grad_norm": 1.1614291667938232, + "learning_rate": 6.379868733986089e-05, + "loss": 0.4499, + "step": 217 + }, + { + "epoch": 1.89, + "grad_norm": 1.1370713710784912, + "learning_rate": 6.292611055542998e-05, + "loss": 0.5316, + "step": 218 + }, + { + "epoch": 1.9, + "grad_norm": 1.1239794492721558, + "learning_rate": 6.205679420263916e-05, + "loss": 0.4265, + "step": 219 + }, + { + "epoch": 1.91, + "grad_norm": 1.5763834714889526, + "learning_rate": 6.119081473277501e-05, + "loss": 0.6717, + "step": 220 + }, + { + "epoch": 1.92, + "grad_norm": 1.087745189666748, + "learning_rate": 6.032824830366466e-05, + "loss": 0.4285, + "step": 221 + }, + { + "epoch": 1.92, + "grad_norm": 1.432390809059143, + "learning_rate": 5.9469170772978186e-05, + "loss": 0.535, + "step": 222 + }, + { + "epoch": 1.93, + "grad_norm": 1.2738263607025146, + "learning_rate": 5.86136576915576e-05, + "loss": 0.5856, + "step": 223 + }, + { + "epoch": 1.94, + "grad_norm": 1.7233974933624268, + "learning_rate": 5.7761784296772395e-05, + "loss": 0.5099, + "step": 224 + }, + { + "epoch": 1.95, + "grad_norm": 1.0894044637680054, + "learning_rate": 5.691362550590297e-05, + "loss": 0.423, + "step": 225 + }, + { + "epoch": 1.96, + "grad_norm": 1.342265248298645, + "learning_rate": 5.606925590955199e-05, + "loss": 0.6104, + "step": 226 + }, + { + "epoch": 1.97, + "grad_norm": 1.0730869770050049, + "learning_rate": 5.522874976508463e-05, + "loss": 0.4497, + "step": 227 + }, + { + "epoch": 1.98, + "grad_norm": 1.2529950141906738, + "learning_rate": 5.439218099009822e-05, + "loss": 0.5444, + "step": 228 + }, + { + "epoch": 1.99, + "grad_norm": 1.1404846906661987, + "learning_rate": 5.355962315592118e-05, + "loss": 0.4093, + "step": 229 + }, + { + "epoch": 1.99, + "grad_norm": 1.0623228549957275, + "learning_rate": 5.273114948114346e-05, + "loss": 0.493, + "step": 230 + } + ], + "logging_steps": 1, + "max_steps": 345, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 115, + "total_flos": 6.031819036164096e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/out/checkpoint-230/training_args.bin b/out/checkpoint-230/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1bf29f9aeb8d54142b45324e1e89c4134fa2826b --- /dev/null +++ b/out/checkpoint-230/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4783b55cb3ce48e21fc6968b7681826272dbf548a28fbbd59dcf516adf35655f +size 5688 diff --git a/out/checkpoint-345/README.md b/out/checkpoint-345/README.md new file mode 100644 index 0000000000000000000000000000000000000000..09b95f7bb1935f89566de0ae0601274fa95c48f2 --- /dev/null +++ b/out/checkpoint-345/README.md @@ -0,0 +1,202 @@ +--- +library_name: peft +base_model: mistralai/Mistral-7B-Instruct-v0.2 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.9.0 \ No newline at end of file diff --git a/out/checkpoint-345/adapter_config.json b/out/checkpoint-345/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9b68237c5b29f907e3cc47e1a1c75ae55cd0de55 --- /dev/null +++ b/out/checkpoint-345/adapter_config.json @@ -0,0 +1,28 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/out/checkpoint-345/adapter_model.safetensors b/out/checkpoint-345/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2e1d9efb1165df78cbac61a27425e92c6201671f --- /dev/null +++ b/out/checkpoint-345/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e70061bca2c882df4379f2020bc2978d21cbe4015353e0ad37cc9fe032a4bf9 +size 13648432 diff --git a/out/checkpoint-345/optimizer.pt b/out/checkpoint-345/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..23313016ac4eb8ae7fb5298d62594ee1500e9e12 --- /dev/null +++ b/out/checkpoint-345/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b2b33a3fd6d340108dee7bcd44f2199f508b2b027ea4658af414c6ad2791a4d +size 6979258 diff --git a/out/checkpoint-345/rng_state_0.pth b/out/checkpoint-345/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..e4864be55249a726ebb997c0951cd6d902c74163 --- /dev/null +++ b/out/checkpoint-345/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:011b2a977a5d7997d5f427d994a59fe803f5c49dd2a36427f8b64ca0068e2552 +size 14512 diff --git a/out/checkpoint-345/rng_state_1.pth b/out/checkpoint-345/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..7df446bb7ccc0c952be70d3a048d76ca46d480d6 --- /dev/null +++ b/out/checkpoint-345/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4433a1f437be48f6d79e1745ba79f9ca7894b7f059e6b28ac13bdbc767e2d52 +size 14512 diff --git a/out/checkpoint-345/scheduler.pt b/out/checkpoint-345/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..31adf50825ace365be74ddd27c4838db29560269 --- /dev/null +++ b/out/checkpoint-345/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:146245e3e68d5e2fb3f88043ab0f9991b7004c9481faa9738a5800b66cdf9509 +size 1064 diff --git a/out/checkpoint-345/special_tokens_map.json b/out/checkpoint-345/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..72ecfeeb7e14d244c936169d2ed139eeae235ef1 --- /dev/null +++ b/out/checkpoint-345/special_tokens_map.json @@ -0,0 +1,24 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/out/checkpoint-345/tokenizer.model b/out/checkpoint-345/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/out/checkpoint-345/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/out/checkpoint-345/tokenizer_config.json b/out/checkpoint-345/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a583d0f8c569fedb4f95590b442b2ac9f5fbd0bf --- /dev/null +++ b/out/checkpoint-345/tokenizer_config.json @@ -0,0 +1,45 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": true, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "bos_token": "", + "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false, + "use_fast": true +} diff --git a/out/checkpoint-345/trainer_state.json b/out/checkpoint-345/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9100f8007857b5d8a881fb80948338ab3929732d --- /dev/null +++ b/out/checkpoint-345/trainer_state.json @@ -0,0 +1,2532 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.991329479768786, + "eval_steps": 29, + "global_step": 345, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "grad_norm": 4.158745288848877, + "learning_rate": 2e-05, + "loss": 3.2348, + "step": 1 + }, + { + "epoch": 0.01, + "eval_loss": 3.0255825519561768, + "eval_runtime": 10.0801, + "eval_samples_per_second": 7.242, + "eval_steps_per_second": 1.885, + "step": 1 + }, + { + "epoch": 0.02, + "grad_norm": 3.48564076423645, + "learning_rate": 4e-05, + "loss": 3.0591, + "step": 2 + }, + { + "epoch": 0.03, + "grad_norm": 4.051155090332031, + "learning_rate": 6e-05, + "loss": 3.345, + "step": 3 + }, + { + "epoch": 0.03, + "grad_norm": 2.94571852684021, + "learning_rate": 8e-05, + "loss": 2.9206, + "step": 4 + }, + { + "epoch": 0.04, + "grad_norm": 3.961529493331909, + "learning_rate": 0.0001, + "loss": 3.0326, + "step": 5 + }, + { + "epoch": 0.05, + "grad_norm": 2.709989547729492, + "learning_rate": 0.00012, + "loss": 2.6911, + "step": 6 + }, + { + "epoch": 0.06, + "grad_norm": 2.7331080436706543, + "learning_rate": 0.00014, + "loss": 2.5443, + "step": 7 + }, + { + "epoch": 0.07, + "grad_norm": 3.204464912414551, + "learning_rate": 0.00016, + "loss": 2.5651, + "step": 8 + }, + { + "epoch": 0.08, + "grad_norm": 2.8977890014648438, + "learning_rate": 0.00018, + "loss": 2.5079, + "step": 9 + }, + { + "epoch": 0.09, + "grad_norm": 2.810033082962036, + "learning_rate": 0.0002, + "loss": 2.2729, + "step": 10 + }, + { + "epoch": 0.1, + "grad_norm": 2.0960497856140137, + "learning_rate": 0.00019999560279257314, + "loss": 2.2163, + "step": 11 + }, + { + "epoch": 0.1, + "grad_norm": 2.17856502532959, + "learning_rate": 0.0001999824115570012, + "loss": 2.1658, + "step": 12 + }, + { + "epoch": 0.11, + "grad_norm": 1.9092390537261963, + "learning_rate": 0.00019996042745337617, + "loss": 2.0652, + "step": 13 + }, + { + "epoch": 0.12, + "grad_norm": 2.242626905441284, + "learning_rate": 0.0001999296524150713, + "loss": 1.9754, + "step": 14 + }, + { + "epoch": 0.13, + "grad_norm": 2.0627095699310303, + "learning_rate": 0.00019989008914857116, + "loss": 1.9169, + "step": 15 + }, + { + "epoch": 0.14, + "grad_norm": 2.00533390045166, + "learning_rate": 0.00019984174113323353, + "loss": 1.756, + "step": 16 + }, + { + "epoch": 0.15, + "grad_norm": 1.5564405918121338, + "learning_rate": 0.00019978461262098343, + "loss": 1.8304, + "step": 17 + }, + { + "epoch": 0.16, + "grad_norm": 2.127427339553833, + "learning_rate": 0.00019971870863593925, + "loss": 1.9093, + "step": 18 + }, + { + "epoch": 0.16, + "grad_norm": 1.4882323741912842, + "learning_rate": 0.00019964403497397084, + "loss": 1.7579, + "step": 19 + }, + { + "epoch": 0.17, + "grad_norm": 1.4289195537567139, + "learning_rate": 0.00019956059820218982, + "loss": 1.5739, + "step": 20 + }, + { + "epoch": 0.18, + "grad_norm": 1.2495356798171997, + "learning_rate": 0.00019946840565837203, + "loss": 1.5548, + "step": 21 + }, + { + "epoch": 0.19, + "grad_norm": 1.476159691810608, + "learning_rate": 0.00019936746545031223, + "loss": 1.5918, + "step": 22 + }, + { + "epoch": 0.2, + "grad_norm": 1.399651050567627, + "learning_rate": 0.0001992577864551111, + "loss": 1.5577, + "step": 23 + }, + { + "epoch": 0.21, + "grad_norm": 1.3828766345977783, + "learning_rate": 0.0001991393783183945, + "loss": 1.5168, + "step": 24 + }, + { + "epoch": 0.22, + "grad_norm": 1.4390034675598145, + "learning_rate": 0.0001990122514534651, + "loss": 1.511, + "step": 25 + }, + { + "epoch": 0.23, + "grad_norm": 1.481458306312561, + "learning_rate": 0.00019887641704038688, + "loss": 1.4448, + "step": 26 + }, + { + "epoch": 0.23, + "grad_norm": 1.4939665794372559, + "learning_rate": 0.00019873188702500163, + "loss": 1.4146, + "step": 27 + }, + { + "epoch": 0.24, + "grad_norm": 1.5099453926086426, + "learning_rate": 0.00019857867411787847, + "loss": 1.4192, + "step": 28 + }, + { + "epoch": 0.25, + "grad_norm": 1.4652291536331177, + "learning_rate": 0.00019841679179319606, + "loss": 1.2936, + "step": 29 + }, + { + "epoch": 0.25, + "eval_loss": 1.2550688982009888, + "eval_runtime": 10.1422, + "eval_samples_per_second": 7.198, + "eval_steps_per_second": 1.873, + "step": 29 + }, + { + "epoch": 0.26, + "grad_norm": 1.5150835514068604, + "learning_rate": 0.0001982462542875576, + "loss": 1.3272, + "step": 30 + }, + { + "epoch": 0.27, + "grad_norm": 1.4407225847244263, + "learning_rate": 0.00019806707659873887, + "loss": 1.1821, + "step": 31 + }, + { + "epoch": 0.28, + "grad_norm": 1.6642241477966309, + "learning_rate": 0.0001978792744843691, + "loss": 1.391, + "step": 32 + }, + { + "epoch": 0.29, + "grad_norm": 1.5081162452697754, + "learning_rate": 0.00019768286446054532, + "loss": 1.296, + "step": 33 + }, + { + "epoch": 0.29, + "grad_norm": 1.2953822612762451, + "learning_rate": 0.0001974778638003799, + "loss": 1.1308, + "step": 34 + }, + { + "epoch": 0.3, + "grad_norm": 1.574062466621399, + "learning_rate": 0.0001972642905324813, + "loss": 1.2351, + "step": 35 + }, + { + "epoch": 0.31, + "grad_norm": 1.355309247970581, + "learning_rate": 0.00019704216343936873, + "loss": 1.0967, + "step": 36 + }, + { + "epoch": 0.32, + "grad_norm": 1.5858274698257446, + "learning_rate": 0.00019681150205582025, + "loss": 1.1853, + "step": 37 + }, + { + "epoch": 0.33, + "grad_norm": 1.1243816614151, + "learning_rate": 0.00019657232666715486, + "loss": 1.0613, + "step": 38 + }, + { + "epoch": 0.34, + "grad_norm": 1.0661962032318115, + "learning_rate": 0.00019632465830744846, + "loss": 1.0101, + "step": 39 + }, + { + "epoch": 0.35, + "grad_norm": 1.246745228767395, + "learning_rate": 0.000196068518757684, + "loss": 1.0325, + "step": 40 + }, + { + "epoch": 0.36, + "grad_norm": 1.2067025899887085, + "learning_rate": 0.00019580393054383622, + "loss": 0.9933, + "step": 41 + }, + { + "epoch": 0.36, + "grad_norm": 1.4754862785339355, + "learning_rate": 0.00019553091693489018, + "loss": 1.1773, + "step": 42 + }, + { + "epoch": 0.37, + "grad_norm": 1.314193844795227, + "learning_rate": 0.00019524950194079534, + "loss": 0.9823, + "step": 43 + }, + { + "epoch": 0.38, + "grad_norm": 1.4316176176071167, + "learning_rate": 0.00019495971031035367, + "loss": 0.9911, + "step": 44 + }, + { + "epoch": 0.39, + "grad_norm": 1.5742014646530151, + "learning_rate": 0.00019466156752904343, + "loss": 0.949, + "step": 45 + }, + { + "epoch": 0.4, + "grad_norm": 1.5437729358673096, + "learning_rate": 0.00019435509981677762, + "loss": 0.9165, + "step": 46 + }, + { + "epoch": 0.41, + "grad_norm": 1.656130075454712, + "learning_rate": 0.00019404033412559826, + "loss": 0.8911, + "step": 47 + }, + { + "epoch": 0.42, + "grad_norm": 1.6358022689819336, + "learning_rate": 0.00019371729813730606, + "loss": 0.957, + "step": 48 + }, + { + "epoch": 0.42, + "grad_norm": 1.4374278783798218, + "learning_rate": 0.00019338602026102594, + "loss": 0.8144, + "step": 49 + }, + { + "epoch": 0.43, + "grad_norm": 1.449952244758606, + "learning_rate": 0.0001930465296307087, + "loss": 0.9469, + "step": 50 + }, + { + "epoch": 0.44, + "grad_norm": 1.4793896675109863, + "learning_rate": 0.00019269885610256865, + "loss": 0.8433, + "step": 51 + }, + { + "epoch": 0.45, + "grad_norm": 1.6898478269577026, + "learning_rate": 0.00019234303025245835, + "loss": 0.7861, + "step": 52 + }, + { + "epoch": 0.46, + "grad_norm": 1.6187032461166382, + "learning_rate": 0.0001919790833731791, + "loss": 1.0128, + "step": 53 + }, + { + "epoch": 0.47, + "grad_norm": 1.6767897605895996, + "learning_rate": 0.00019160704747172934, + "loss": 0.9005, + "step": 54 + }, + { + "epoch": 0.48, + "grad_norm": 1.5994288921356201, + "learning_rate": 0.00019122695526648968, + "loss": 0.8365, + "step": 55 + }, + { + "epoch": 0.49, + "grad_norm": 2.3374545574188232, + "learning_rate": 0.00019083884018434547, + "loss": 0.753, + "step": 56 + }, + { + "epoch": 0.49, + "grad_norm": 2.1330294609069824, + "learning_rate": 0.00019044273635774705, + "loss": 0.704, + "step": 57 + }, + { + "epoch": 0.5, + "grad_norm": 2.139307737350464, + "learning_rate": 0.00019003867862170832, + "loss": 0.9394, + "step": 58 + }, + { + "epoch": 0.5, + "eval_loss": 0.7861204147338867, + "eval_runtime": 10.1594, + "eval_samples_per_second": 7.185, + "eval_steps_per_second": 1.87, + "step": 58 + }, + { + "epoch": 0.51, + "grad_norm": 1.8726176023483276, + "learning_rate": 0.00018962670251074275, + "loss": 0.8582, + "step": 59 + }, + { + "epoch": 0.52, + "grad_norm": 2.6511785984039307, + "learning_rate": 0.00018920684425573865, + "loss": 0.7147, + "step": 60 + }, + { + "epoch": 0.53, + "grad_norm": 1.792511224746704, + "learning_rate": 0.0001887791407807728, + "loss": 0.6567, + "step": 61 + }, + { + "epoch": 0.54, + "grad_norm": 1.798560619354248, + "learning_rate": 0.00018834362969986308, + "loss": 0.6262, + "step": 62 + }, + { + "epoch": 0.55, + "grad_norm": 1.9578020572662354, + "learning_rate": 0.00018790034931366072, + "loss": 0.669, + "step": 63 + }, + { + "epoch": 0.55, + "grad_norm": 1.8020246028900146, + "learning_rate": 0.00018744933860608183, + "loss": 0.5335, + "step": 64 + }, + { + "epoch": 0.56, + "grad_norm": 2.5128743648529053, + "learning_rate": 0.00018699063724087904, + "loss": 0.6338, + "step": 65 + }, + { + "epoch": 0.57, + "grad_norm": 2.0579841136932373, + "learning_rate": 0.0001865242855581534, + "loss": 0.7927, + "step": 66 + }, + { + "epoch": 0.58, + "grad_norm": 2.1528258323669434, + "learning_rate": 0.00018605032457080653, + "loss": 1.0926, + "step": 67 + }, + { + "epoch": 0.59, + "grad_norm": 1.609959363937378, + "learning_rate": 0.00018556879596093393, + "loss": 0.7551, + "step": 68 + }, + { + "epoch": 0.6, + "grad_norm": 1.6629923582077026, + "learning_rate": 0.00018507974207615917, + "loss": 0.8931, + "step": 69 + }, + { + "epoch": 0.61, + "grad_norm": 1.9914518594741821, + "learning_rate": 0.00018458320592590975, + "loss": 0.6009, + "step": 70 + }, + { + "epoch": 0.62, + "grad_norm": 1.7312421798706055, + "learning_rate": 0.00018407923117763462, + "loss": 1.0439, + "step": 71 + }, + { + "epoch": 0.62, + "grad_norm": 1.5664162635803223, + "learning_rate": 0.00018356786215296386, + "loss": 0.8916, + "step": 72 + }, + { + "epoch": 0.63, + "grad_norm": 1.6064096689224243, + "learning_rate": 0.000183049143823811, + "loss": 0.526, + "step": 73 + }, + { + "epoch": 0.64, + "grad_norm": 1.5315232276916504, + "learning_rate": 0.00018252312180841776, + "loss": 0.7483, + "step": 74 + }, + { + "epoch": 0.65, + "grad_norm": 1.5293208360671997, + "learning_rate": 0.00018198984236734246, + "loss": 0.5264, + "step": 75 + }, + { + "epoch": 0.66, + "grad_norm": 1.3707643747329712, + "learning_rate": 0.00018144935239939144, + "loss": 0.5019, + "step": 76 + }, + { + "epoch": 0.67, + "grad_norm": 1.4324684143066406, + "learning_rate": 0.00018090169943749476, + "loss": 0.847, + "step": 77 + }, + { + "epoch": 0.68, + "grad_norm": 1.9568486213684082, + "learning_rate": 0.00018034693164452578, + "loss": 0.8341, + "step": 78 + }, + { + "epoch": 0.68, + "grad_norm": 2.0321366786956787, + "learning_rate": 0.0001797850978090658, + "loss": 0.8289, + "step": 79 + }, + { + "epoch": 0.69, + "grad_norm": 1.8444325923919678, + "learning_rate": 0.00017921624734111292, + "loss": 0.6042, + "step": 80 + }, + { + "epoch": 0.7, + "grad_norm": 1.4516267776489258, + "learning_rate": 0.0001786404302677374, + "loss": 0.7954, + "step": 81 + }, + { + "epoch": 0.71, + "grad_norm": 1.318422794342041, + "learning_rate": 0.0001780576972286813, + "loss": 0.7394, + "step": 82 + }, + { + "epoch": 0.72, + "grad_norm": 1.456667423248291, + "learning_rate": 0.0001774680994719057, + "loss": 0.7574, + "step": 83 + }, + { + "epoch": 0.73, + "grad_norm": 1.3297538757324219, + "learning_rate": 0.00017687168884908316, + "loss": 0.6806, + "step": 84 + }, + { + "epoch": 0.74, + "grad_norm": 1.8390334844589233, + "learning_rate": 0.0001762685178110382, + "loss": 0.8611, + "step": 85 + }, + { + "epoch": 0.75, + "grad_norm": 1.587986707687378, + "learning_rate": 0.00017565863940313415, + "loss": 0.6769, + "step": 86 + }, + { + "epoch": 0.75, + "grad_norm": 1.8962986469268799, + "learning_rate": 0.00017504210726060828, + "loss": 0.6422, + "step": 87 + }, + { + "epoch": 0.75, + "eval_loss": 0.6252095699310303, + "eval_runtime": 10.1853, + "eval_samples_per_second": 7.167, + "eval_steps_per_second": 1.865, + "step": 87 + }, + { + "epoch": 0.76, + "grad_norm": 2.887190580368042, + "learning_rate": 0.00017441897560385491, + "loss": 0.5887, + "step": 88 + }, + { + "epoch": 0.77, + "grad_norm": 2.028211832046509, + "learning_rate": 0.00017378929923365704, + "loss": 0.6192, + "step": 89 + }, + { + "epoch": 0.78, + "grad_norm": 1.2805747985839844, + "learning_rate": 0.0001731531335263669, + "loss": 0.5369, + "step": 90 + }, + { + "epoch": 0.79, + "grad_norm": 1.5532221794128418, + "learning_rate": 0.00017251053442903595, + "loss": 0.6197, + "step": 91 + }, + { + "epoch": 0.8, + "grad_norm": 1.1624884605407715, + "learning_rate": 0.00017186155845449466, + "loss": 0.5553, + "step": 92 + }, + { + "epoch": 0.81, + "grad_norm": 1.3367836475372314, + "learning_rate": 0.0001712062626763825, + "loss": 0.7564, + "step": 93 + }, + { + "epoch": 0.82, + "grad_norm": 1.3319028615951538, + "learning_rate": 0.00017054470472412873, + "loss": 0.714, + "step": 94 + }, + { + "epoch": 0.82, + "grad_norm": 1.060636043548584, + "learning_rate": 0.00016987694277788417, + "loss": 0.5233, + "step": 95 + }, + { + "epoch": 0.83, + "grad_norm": 2.4481117725372314, + "learning_rate": 0.0001692030355634046, + "loss": 0.5747, + "step": 96 + }, + { + "epoch": 0.84, + "grad_norm": 1.0515168905258179, + "learning_rate": 0.00016852304234688626, + "loss": 0.5198, + "step": 97 + }, + { + "epoch": 0.85, + "grad_norm": 1.1427360773086548, + "learning_rate": 0.0001678370229297535, + "loss": 0.6179, + "step": 98 + }, + { + "epoch": 0.86, + "grad_norm": 1.7050713300704956, + "learning_rate": 0.00016714503764339987, + "loss": 0.3758, + "step": 99 + }, + { + "epoch": 0.87, + "grad_norm": 1.5861639976501465, + "learning_rate": 0.00016644714734388217, + "loss": 0.7054, + "step": 100 + }, + { + "epoch": 0.88, + "grad_norm": 1.2969304323196411, + "learning_rate": 0.0001657434134065686, + "loss": 0.5552, + "step": 101 + }, + { + "epoch": 0.88, + "grad_norm": 1.33128821849823, + "learning_rate": 0.00016503389772074104, + "loss": 0.681, + "step": 102 + }, + { + "epoch": 0.89, + "grad_norm": 1.1912298202514648, + "learning_rate": 0.00016431866268415237, + "loss": 0.6007, + "step": 103 + }, + { + "epoch": 0.9, + "grad_norm": 1.2182543277740479, + "learning_rate": 0.00016359777119753885, + "loss": 0.5058, + "step": 104 + }, + { + "epoch": 0.91, + "grad_norm": 1.2563199996948242, + "learning_rate": 0.0001628712866590885, + "loss": 0.5687, + "step": 105 + }, + { + "epoch": 0.92, + "grad_norm": 1.0571441650390625, + "learning_rate": 0.00016213927295886547, + "loss": 0.4943, + "step": 106 + }, + { + "epoch": 0.93, + "grad_norm": 1.2992205619812012, + "learning_rate": 0.00016140179447319132, + "loss": 0.4525, + "step": 107 + }, + { + "epoch": 0.94, + "grad_norm": 1.0483875274658203, + "learning_rate": 0.00016065891605898357, + "loss": 0.5543, + "step": 108 + }, + { + "epoch": 0.95, + "grad_norm": 1.2245246171951294, + "learning_rate": 0.00015991070304805183, + "loss": 0.5581, + "step": 109 + }, + { + "epoch": 0.95, + "grad_norm": 1.2341938018798828, + "learning_rate": 0.00015915722124135227, + "loss": 0.6072, + "step": 110 + }, + { + "epoch": 0.96, + "grad_norm": 1.4028526544570923, + "learning_rate": 0.00015839853690320074, + "loss": 0.6442, + "step": 111 + }, + { + "epoch": 0.97, + "grad_norm": 2.1308200359344482, + "learning_rate": 0.00015763471675544547, + "loss": 0.6541, + "step": 112 + }, + { + "epoch": 0.98, + "grad_norm": 1.8692742586135864, + "learning_rate": 0.00015686582797159893, + "loss": 0.7302, + "step": 113 + }, + { + "epoch": 0.99, + "grad_norm": 1.4890114068984985, + "learning_rate": 0.00015609193817093058, + "loss": 0.7293, + "step": 114 + }, + { + "epoch": 1.0, + "grad_norm": 1.059873342514038, + "learning_rate": 0.00015531311541251995, + "loss": 0.2306, + "step": 115 + }, + { + "epoch": 1.01, + "grad_norm": 1.476172685623169, + "learning_rate": 0.00015452942818927143, + "loss": 0.4316, + "step": 116 + }, + { + "epoch": 1.01, + "eval_loss": 0.5628204941749573, + "eval_runtime": 10.197, + "eval_samples_per_second": 7.159, + "eval_steps_per_second": 1.863, + "step": 116 + }, + { + "epoch": 1.01, + "grad_norm": 1.5941603183746338, + "learning_rate": 0.00015374094542189054, + "loss": 0.6178, + "step": 117 + }, + { + "epoch": 1.02, + "grad_norm": 1.3935587406158447, + "learning_rate": 0.000152947736452823, + "loss": 0.5188, + "step": 118 + }, + { + "epoch": 1.03, + "grad_norm": 1.4468976259231567, + "learning_rate": 0.0001521498710401561, + "loss": 0.5853, + "step": 119 + }, + { + "epoch": 1.04, + "grad_norm": 1.547136664390564, + "learning_rate": 0.0001513474193514842, + "loss": 0.6912, + "step": 120 + }, + { + "epoch": 1.05, + "grad_norm": 1.5418950319290161, + "learning_rate": 0.0001505404519577379, + "loss": 0.426, + "step": 121 + }, + { + "epoch": 1.06, + "grad_norm": 1.4496601819992065, + "learning_rate": 0.00014972903982697744, + "loss": 0.4834, + "step": 122 + }, + { + "epoch": 1.07, + "grad_norm": 1.5139033794403076, + "learning_rate": 0.00014891325431815183, + "loss": 0.538, + "step": 123 + }, + { + "epoch": 1.08, + "grad_norm": 1.2647703886032104, + "learning_rate": 0.000148093167174823, + "loss": 0.5842, + "step": 124 + }, + { + "epoch": 1.08, + "grad_norm": 1.1801835298538208, + "learning_rate": 0.00014726885051885653, + "loss": 0.5099, + "step": 125 + }, + { + "epoch": 1.09, + "grad_norm": 1.692179799079895, + "learning_rate": 0.00014644037684407882, + "loss": 0.6802, + "step": 126 + }, + { + "epoch": 1.1, + "grad_norm": 1.2686200141906738, + "learning_rate": 0.00014560781900990185, + "loss": 0.5672, + "step": 127 + }, + { + "epoch": 1.11, + "grad_norm": 1.7675485610961914, + "learning_rate": 0.00014477125023491536, + "loss": 0.4094, + "step": 128 + }, + { + "epoch": 1.12, + "grad_norm": 1.0968059301376343, + "learning_rate": 0.00014393074409044802, + "loss": 0.4518, + "step": 129 + }, + { + "epoch": 1.13, + "grad_norm": 1.3362330198287964, + "learning_rate": 0.00014308637449409706, + "loss": 0.6245, + "step": 130 + }, + { + "epoch": 1.14, + "grad_norm": 1.2707240581512451, + "learning_rate": 0.00014223821570322762, + "loss": 0.4208, + "step": 131 + }, + { + "epoch": 1.14, + "grad_norm": 1.2580605745315552, + "learning_rate": 0.0001413863423084424, + "loss": 0.4996, + "step": 132 + }, + { + "epoch": 1.15, + "grad_norm": 1.566819429397583, + "learning_rate": 0.00014053082922702183, + "loss": 0.4216, + "step": 133 + }, + { + "epoch": 1.16, + "grad_norm": 1.4758243560791016, + "learning_rate": 0.00013967175169633537, + "loss": 0.4964, + "step": 134 + }, + { + "epoch": 1.17, + "grad_norm": 0.8215718865394592, + "learning_rate": 0.00013880918526722497, + "loss": 0.2789, + "step": 135 + }, + { + "epoch": 1.18, + "grad_norm": 1.3133045434951782, + "learning_rate": 0.00013794320579736083, + "loss": 0.5141, + "step": 136 + }, + { + "epoch": 1.19, + "grad_norm": 1.0844050645828247, + "learning_rate": 0.00013707388944457006, + "loss": 0.3922, + "step": 137 + }, + { + "epoch": 1.2, + "grad_norm": 1.4484992027282715, + "learning_rate": 0.00013620131266013912, + "loss": 0.5752, + "step": 138 + }, + { + "epoch": 1.21, + "grad_norm": 1.2113739252090454, + "learning_rate": 0.00013532555218209036, + "loss": 0.4957, + "step": 139 + }, + { + "epoch": 1.21, + "grad_norm": 1.2045493125915527, + "learning_rate": 0.0001344466850284333, + "loss": 0.4831, + "step": 140 + }, + { + "epoch": 1.22, + "grad_norm": 1.3927645683288574, + "learning_rate": 0.0001335647884903915, + "loss": 0.5629, + "step": 141 + }, + { + "epoch": 1.23, + "grad_norm": 1.1210253238677979, + "learning_rate": 0.00013267994012560504, + "loss": 0.4893, + "step": 142 + }, + { + "epoch": 1.24, + "grad_norm": 1.6540066003799438, + "learning_rate": 0.00013179221775131005, + "loss": 0.7858, + "step": 143 + }, + { + "epoch": 1.25, + "grad_norm": 1.1413919925689697, + "learning_rate": 0.00013090169943749476, + "loss": 0.4032, + "step": 144 + }, + { + "epoch": 1.26, + "grad_norm": 1.1475608348846436, + "learning_rate": 0.0001300084635000341, + "loss": 0.4754, + "step": 145 + }, + { + "epoch": 1.26, + "eval_loss": 0.5256438255310059, + "eval_runtime": 10.3648, + "eval_samples_per_second": 7.043, + "eval_steps_per_second": 1.833, + "step": 145 + }, + { + "epoch": 1.27, + "grad_norm": 1.4064439535140991, + "learning_rate": 0.000129112588493802, + "loss": 0.4578, + "step": 146 + }, + { + "epoch": 1.27, + "grad_norm": 1.5755749940872192, + "learning_rate": 0.0001282141532057631, + "loss": 0.7122, + "step": 147 + }, + { + "epoch": 1.28, + "grad_norm": 1.5215011835098267, + "learning_rate": 0.0001273132366480438, + "loss": 0.508, + "step": 148 + }, + { + "epoch": 1.29, + "grad_norm": 1.2629594802856445, + "learning_rate": 0.00012640991805098367, + "loss": 0.4476, + "step": 149 + }, + { + "epoch": 1.3, + "grad_norm": 1.9542852640151978, + "learning_rate": 0.00012550427685616765, + "loss": 0.5941, + "step": 150 + }, + { + "epoch": 1.31, + "grad_norm": 1.3931288719177246, + "learning_rate": 0.00012459639270943944, + "loss": 0.5879, + "step": 151 + }, + { + "epoch": 1.32, + "grad_norm": 1.2423650026321411, + "learning_rate": 0.00012368634545389733, + "loss": 0.3622, + "step": 152 + }, + { + "epoch": 1.33, + "grad_norm": 1.406791090965271, + "learning_rate": 0.00012277421512287226, + "loss": 0.5997, + "step": 153 + }, + { + "epoch": 1.34, + "grad_norm": 3.076571464538574, + "learning_rate": 0.00012186008193288962, + "loss": 0.5418, + "step": 154 + }, + { + "epoch": 1.34, + "grad_norm": 1.2042341232299805, + "learning_rate": 0.00012094402627661447, + "loss": 0.4835, + "step": 155 + }, + { + "epoch": 1.35, + "grad_norm": 1.7455346584320068, + "learning_rate": 0.00012002612871578143, + "loss": 0.5839, + "step": 156 + }, + { + "epoch": 1.36, + "grad_norm": 1.2508723735809326, + "learning_rate": 0.00011910646997411001, + "loss": 0.2307, + "step": 157 + }, + { + "epoch": 1.37, + "grad_norm": 1.636293649673462, + "learning_rate": 0.00011818513093020513, + "loss": 0.6068, + "step": 158 + }, + { + "epoch": 1.38, + "grad_norm": 1.0142595767974854, + "learning_rate": 0.0001172621926104446, + "loss": 0.3736, + "step": 159 + }, + { + "epoch": 1.39, + "grad_norm": 1.6434544324874878, + "learning_rate": 0.00011633773618185302, + "loss": 0.7835, + "step": 160 + }, + { + "epoch": 1.4, + "grad_norm": 1.6137529611587524, + "learning_rate": 0.00011541184294496392, + "loss": 0.6048, + "step": 161 + }, + { + "epoch": 1.4, + "grad_norm": 1.9305659532546997, + "learning_rate": 0.00011448459432666961, + "loss": 0.6432, + "step": 162 + }, + { + "epoch": 1.41, + "grad_norm": 1.2869210243225098, + "learning_rate": 0.00011355607187306037, + "loss": 0.2576, + "step": 163 + }, + { + "epoch": 1.42, + "grad_norm": 0.986311674118042, + "learning_rate": 0.00011262635724225272, + "loss": 0.2711, + "step": 164 + }, + { + "epoch": 1.43, + "grad_norm": 1.6139075756072998, + "learning_rate": 0.00011169553219720828, + "loss": 0.3115, + "step": 165 + }, + { + "epoch": 1.44, + "grad_norm": 1.405561923980713, + "learning_rate": 0.00011076367859854304, + "loss": 0.7212, + "step": 166 + }, + { + "epoch": 1.45, + "grad_norm": 1.2157914638519287, + "learning_rate": 0.00010983087839732833, + "loss": 0.4097, + "step": 167 + }, + { + "epoch": 1.46, + "grad_norm": 1.6579598188400269, + "learning_rate": 0.00010889721362788361, + "loss": 0.3952, + "step": 168 + }, + { + "epoch": 1.47, + "grad_norm": 1.5846188068389893, + "learning_rate": 0.000107962766400562, + "loss": 0.5481, + "step": 169 + }, + { + "epoch": 1.47, + "grad_norm": 2.198352336883545, + "learning_rate": 0.0001070276188945293, + "loss": 0.5907, + "step": 170 + }, + { + "epoch": 1.48, + "grad_norm": 0.990231454372406, + "learning_rate": 0.00010609185335053669, + "loss": 0.2935, + "step": 171 + }, + { + "epoch": 1.49, + "grad_norm": 1.4345817565917969, + "learning_rate": 0.00010515555206368815, + "loss": 0.543, + "step": 172 + }, + { + "epoch": 1.5, + "grad_norm": 1.1659619808197021, + "learning_rate": 0.00010421879737620312, + "loss": 0.4425, + "step": 173 + }, + { + "epoch": 1.51, + "grad_norm": 1.2373487949371338, + "learning_rate": 0.000103281671670175, + "loss": 0.4847, + "step": 174 + }, + { + "epoch": 1.51, + "eval_loss": 0.5034880042076111, + "eval_runtime": 10.3942, + "eval_samples_per_second": 7.023, + "eval_steps_per_second": 1.828, + "step": 174 + }, + { + "epoch": 1.52, + "grad_norm": 1.248612403869629, + "learning_rate": 0.00010234425736032607, + "loss": 0.4114, + "step": 175 + }, + { + "epoch": 1.53, + "grad_norm": 1.0567479133605957, + "learning_rate": 0.0001014066368867596, + "loss": 0.3538, + "step": 176 + }, + { + "epoch": 1.53, + "grad_norm": 1.5678746700286865, + "learning_rate": 0.00010046889270770987, + "loss": 0.6153, + "step": 177 + }, + { + "epoch": 1.54, + "grad_norm": 1.9261131286621094, + "learning_rate": 9.953110729229017e-05, + "loss": 0.5507, + "step": 178 + }, + { + "epoch": 1.55, + "grad_norm": 1.5983983278274536, + "learning_rate": 9.859336311324041e-05, + "loss": 0.5178, + "step": 179 + }, + { + "epoch": 1.56, + "grad_norm": 1.6287596225738525, + "learning_rate": 9.765574263967396e-05, + "loss": 0.4269, + "step": 180 + }, + { + "epoch": 1.57, + "grad_norm": 1.7541674375534058, + "learning_rate": 9.671832832982502e-05, + "loss": 0.8199, + "step": 181 + }, + { + "epoch": 1.58, + "grad_norm": 1.1184483766555786, + "learning_rate": 9.57812026237969e-05, + "loss": 0.5602, + "step": 182 + }, + { + "epoch": 1.59, + "grad_norm": 0.8452370762825012, + "learning_rate": 9.484444793631186e-05, + "loss": 0.2324, + "step": 183 + }, + { + "epoch": 1.6, + "grad_norm": 1.1326618194580078, + "learning_rate": 9.390814664946331e-05, + "loss": 0.4516, + "step": 184 + }, + { + "epoch": 1.6, + "grad_norm": 1.736547589302063, + "learning_rate": 9.297238110547074e-05, + "loss": 0.6761, + "step": 185 + }, + { + "epoch": 1.61, + "grad_norm": 1.2101978063583374, + "learning_rate": 9.203723359943802e-05, + "loss": 0.4739, + "step": 186 + }, + { + "epoch": 1.62, + "grad_norm": 1.1914681196212769, + "learning_rate": 9.110278637211643e-05, + "loss": 0.4065, + "step": 187 + }, + { + "epoch": 1.63, + "grad_norm": 1.277013897895813, + "learning_rate": 9.016912160267168e-05, + "loss": 0.347, + "step": 188 + }, + { + "epoch": 1.64, + "grad_norm": 1.4066228866577148, + "learning_rate": 8.923632140145701e-05, + "loss": 0.471, + "step": 189 + }, + { + "epoch": 1.65, + "grad_norm": 1.3591418266296387, + "learning_rate": 8.830446780279176e-05, + "loss": 0.6158, + "step": 190 + }, + { + "epoch": 1.66, + "grad_norm": 1.1961108446121216, + "learning_rate": 8.73736427577473e-05, + "loss": 0.5877, + "step": 191 + }, + { + "epoch": 1.66, + "grad_norm": 1.0860059261322021, + "learning_rate": 8.644392812693968e-05, + "loss": 0.476, + "step": 192 + }, + { + "epoch": 1.67, + "grad_norm": 1.1880991458892822, + "learning_rate": 8.55154056733304e-05, + "loss": 0.3546, + "step": 193 + }, + { + "epoch": 1.68, + "grad_norm": 1.4166094064712524, + "learning_rate": 8.458815705503611e-05, + "loss": 0.5122, + "step": 194 + }, + { + "epoch": 1.69, + "grad_norm": 1.1290371417999268, + "learning_rate": 8.366226381814697e-05, + "loss": 0.5407, + "step": 195 + }, + { + "epoch": 1.7, + "grad_norm": 1.3259081840515137, + "learning_rate": 8.273780738955544e-05, + "loss": 0.4521, + "step": 196 + }, + { + "epoch": 1.71, + "grad_norm": 1.1714779138565063, + "learning_rate": 8.181486906979487e-05, + "loss": 0.5858, + "step": 197 + }, + { + "epoch": 1.72, + "grad_norm": 0.8648713231086731, + "learning_rate": 8.089353002589001e-05, + "loss": 0.3257, + "step": 198 + }, + { + "epoch": 1.73, + "grad_norm": 1.0609321594238281, + "learning_rate": 7.997387128421858e-05, + "loss": 0.5146, + "step": 199 + }, + { + "epoch": 1.73, + "grad_norm": 1.1280933618545532, + "learning_rate": 7.905597372338558e-05, + "loss": 0.4356, + "step": 200 + }, + { + "epoch": 1.74, + "grad_norm": 1.4397556781768799, + "learning_rate": 7.813991806711039e-05, + "loss": 0.4239, + "step": 201 + }, + { + "epoch": 1.75, + "grad_norm": 1.3741945028305054, + "learning_rate": 7.722578487712776e-05, + "loss": 0.4366, + "step": 202 + }, + { + "epoch": 1.76, + "grad_norm": 1.1792335510253906, + "learning_rate": 7.631365454610273e-05, + "loss": 0.4473, + "step": 203 + }, + { + "epoch": 1.76, + "eval_loss": 0.48859503865242004, + "eval_runtime": 10.4015, + "eval_samples_per_second": 7.018, + "eval_steps_per_second": 1.827, + "step": 203 + }, + { + "epoch": 1.77, + "grad_norm": 1.2233651876449585, + "learning_rate": 7.540360729056058e-05, + "loss": 0.5488, + "step": 204 + }, + { + "epoch": 1.78, + "grad_norm": 1.4724386930465698, + "learning_rate": 7.449572314383237e-05, + "loss": 0.4788, + "step": 205 + }, + { + "epoch": 1.79, + "grad_norm": 0.9696942567825317, + "learning_rate": 7.359008194901632e-05, + "loss": 0.4268, + "step": 206 + }, + { + "epoch": 1.79, + "grad_norm": 1.8895478248596191, + "learning_rate": 7.268676335195623e-05, + "loss": 0.752, + "step": 207 + }, + { + "epoch": 1.8, + "grad_norm": 1.5549920797348022, + "learning_rate": 7.178584679423695e-05, + "loss": 0.2853, + "step": 208 + }, + { + "epoch": 1.81, + "grad_norm": 1.517907738685608, + "learning_rate": 7.088741150619803e-05, + "loss": 0.4255, + "step": 209 + }, + { + "epoch": 1.82, + "grad_norm": 1.0410192012786865, + "learning_rate": 6.999153649996595e-05, + "loss": 0.3361, + "step": 210 + }, + { + "epoch": 1.83, + "grad_norm": 1.5179475545883179, + "learning_rate": 6.909830056250527e-05, + "loss": 0.6939, + "step": 211 + }, + { + "epoch": 1.84, + "grad_norm": 1.173094391822815, + "learning_rate": 6.820778224868998e-05, + "loss": 0.2382, + "step": 212 + }, + { + "epoch": 1.85, + "grad_norm": 1.077552318572998, + "learning_rate": 6.732005987439494e-05, + "loss": 0.4151, + "step": 213 + }, + { + "epoch": 1.86, + "grad_norm": 1.062132716178894, + "learning_rate": 6.643521150960854e-05, + "loss": 0.4051, + "step": 214 + }, + { + "epoch": 1.86, + "grad_norm": 0.9788100123405457, + "learning_rate": 6.555331497156672e-05, + "loss": 0.4699, + "step": 215 + }, + { + "epoch": 1.87, + "grad_norm": 1.193488359451294, + "learning_rate": 6.467444781790966e-05, + "loss": 0.5863, + "step": 216 + }, + { + "epoch": 1.88, + "grad_norm": 1.1614291667938232, + "learning_rate": 6.379868733986089e-05, + "loss": 0.4499, + "step": 217 + }, + { + "epoch": 1.89, + "grad_norm": 1.1370713710784912, + "learning_rate": 6.292611055542998e-05, + "loss": 0.5316, + "step": 218 + }, + { + "epoch": 1.9, + "grad_norm": 1.1239794492721558, + "learning_rate": 6.205679420263916e-05, + "loss": 0.4265, + "step": 219 + }, + { + "epoch": 1.91, + "grad_norm": 1.5763834714889526, + "learning_rate": 6.119081473277501e-05, + "loss": 0.6717, + "step": 220 + }, + { + "epoch": 1.92, + "grad_norm": 1.087745189666748, + "learning_rate": 6.032824830366466e-05, + "loss": 0.4285, + "step": 221 + }, + { + "epoch": 1.92, + "grad_norm": 1.432390809059143, + "learning_rate": 5.9469170772978186e-05, + "loss": 0.535, + "step": 222 + }, + { + "epoch": 1.93, + "grad_norm": 1.2738263607025146, + "learning_rate": 5.86136576915576e-05, + "loss": 0.5856, + "step": 223 + }, + { + "epoch": 1.94, + "grad_norm": 1.7233974933624268, + "learning_rate": 5.7761784296772395e-05, + "loss": 0.5099, + "step": 224 + }, + { + "epoch": 1.95, + "grad_norm": 1.0894044637680054, + "learning_rate": 5.691362550590297e-05, + "loss": 0.423, + "step": 225 + }, + { + "epoch": 1.96, + "grad_norm": 1.342265248298645, + "learning_rate": 5.606925590955199e-05, + "loss": 0.6104, + "step": 226 + }, + { + "epoch": 1.97, + "grad_norm": 1.0730869770050049, + "learning_rate": 5.522874976508463e-05, + "loss": 0.4497, + "step": 227 + }, + { + "epoch": 1.98, + "grad_norm": 1.2529950141906738, + "learning_rate": 5.439218099009822e-05, + "loss": 0.5444, + "step": 228 + }, + { + "epoch": 1.99, + "grad_norm": 1.1404846906661987, + "learning_rate": 5.355962315592118e-05, + "loss": 0.4093, + "step": 229 + }, + { + "epoch": 1.99, + "grad_norm": 1.0623228549957275, + "learning_rate": 5.273114948114346e-05, + "loss": 0.493, + "step": 230 + }, + { + "epoch": 2.0, + "grad_norm": 1.1700286865234375, + "learning_rate": 5.190683282517701e-05, + "loss": 0.3995, + "step": 231 + }, + { + "epoch": 2.01, + "grad_norm": 1.3474769592285156, + "learning_rate": 5.108674568184822e-05, + "loss": 0.5926, + "step": 232 + }, + { + "epoch": 2.01, + "eval_loss": 0.4762152433395386, + "eval_runtime": 10.3875, + "eval_samples_per_second": 7.028, + "eval_steps_per_second": 1.829, + "step": 232 + }, + { + "epoch": 2.02, + "grad_norm": 0.9689212441444397, + "learning_rate": 5.0270960173022604e-05, + "loss": 0.3785, + "step": 233 + }, + { + "epoch": 2.03, + "grad_norm": 1.6816365718841553, + "learning_rate": 4.945954804226214e-05, + "loss": 0.5284, + "step": 234 + }, + { + "epoch": 2.04, + "grad_norm": 1.1274938583374023, + "learning_rate": 4.865258064851579e-05, + "loss": 0.3712, + "step": 235 + }, + { + "epoch": 2.05, + "grad_norm": 1.754917860031128, + "learning_rate": 4.785012895984397e-05, + "loss": 0.6694, + "step": 236 + }, + { + "epoch": 2.05, + "grad_norm": 1.5507243871688843, + "learning_rate": 4.705226354717703e-05, + "loss": 0.6641, + "step": 237 + }, + { + "epoch": 2.06, + "grad_norm": 1.3035473823547363, + "learning_rate": 4.6259054578109426e-05, + "loss": 0.4379, + "step": 238 + }, + { + "epoch": 2.07, + "grad_norm": 1.1499701738357544, + "learning_rate": 4.547057181072861e-05, + "loss": 0.3368, + "step": 239 + }, + { + "epoch": 2.08, + "grad_norm": 1.6347543001174927, + "learning_rate": 4.468688458748006e-05, + "loss": 0.7264, + "step": 240 + }, + { + "epoch": 2.09, + "grad_norm": 1.2663682699203491, + "learning_rate": 4.390806182906946e-05, + "loss": 0.4011, + "step": 241 + }, + { + "epoch": 2.1, + "grad_norm": 1.5727379322052002, + "learning_rate": 4.313417202840106e-05, + "loss": 0.4289, + "step": 242 + }, + { + "epoch": 2.11, + "grad_norm": 1.1615204811096191, + "learning_rate": 4.2365283244554545e-05, + "loss": 0.4438, + "step": 243 + }, + { + "epoch": 2.12, + "grad_norm": 1.6537014245986938, + "learning_rate": 4.1601463096799274e-05, + "loss": 0.4657, + "step": 244 + }, + { + "epoch": 2.12, + "grad_norm": 1.649515151977539, + "learning_rate": 4.084277875864776e-05, + "loss": 0.608, + "step": 245 + }, + { + "epoch": 2.13, + "grad_norm": 1.000108242034912, + "learning_rate": 4.008929695194819e-05, + "loss": 0.3033, + "step": 246 + }, + { + "epoch": 2.14, + "grad_norm": 1.3879984617233276, + "learning_rate": 3.9341083941016445e-05, + "loss": 0.5066, + "step": 247 + }, + { + "epoch": 2.15, + "grad_norm": 1.6241618394851685, + "learning_rate": 3.859820552680867e-05, + "loss": 0.4506, + "step": 248 + }, + { + "epoch": 2.16, + "grad_norm": 1.8910174369812012, + "learning_rate": 3.786072704113456e-05, + "loss": 0.4808, + "step": 249 + }, + { + "epoch": 2.17, + "grad_norm": 1.5988129377365112, + "learning_rate": 3.7128713340911535e-05, + "loss": 0.5178, + "step": 250 + }, + { + "epoch": 2.18, + "grad_norm": 1.2832223176956177, + "learning_rate": 3.640222880246117e-05, + "loss": 0.4625, + "step": 251 + }, + { + "epoch": 2.18, + "grad_norm": 1.1611508131027222, + "learning_rate": 3.568133731584767e-05, + "loss": 0.2785, + "step": 252 + }, + { + "epoch": 2.19, + "grad_norm": 1.6666474342346191, + "learning_rate": 3.496610227925896e-05, + "loss": 0.4976, + "step": 253 + }, + { + "epoch": 2.2, + "grad_norm": 1.7112529277801514, + "learning_rate": 3.4256586593431407e-05, + "loss": 0.6395, + "step": 254 + }, + { + "epoch": 2.21, + "grad_norm": 1.5504542589187622, + "learning_rate": 3.355285265611784e-05, + "loss": 0.5283, + "step": 255 + }, + { + "epoch": 2.22, + "grad_norm": 1.4395564794540405, + "learning_rate": 3.2854962356600124e-05, + "loss": 0.4776, + "step": 256 + }, + { + "epoch": 2.23, + "grad_norm": 0.9323856234550476, + "learning_rate": 3.216297707024655e-05, + "loss": 0.2497, + "step": 257 + }, + { + "epoch": 2.24, + "grad_norm": 1.0921452045440674, + "learning_rate": 3.147695765311377e-05, + "loss": 0.3748, + "step": 258 + }, + { + "epoch": 2.25, + "grad_norm": 1.1914565563201904, + "learning_rate": 3.079696443659538e-05, + "loss": 0.556, + "step": 259 + }, + { + "epoch": 2.25, + "grad_norm": 1.1121900081634521, + "learning_rate": 3.0123057222115836e-05, + "loss": 0.3107, + "step": 260 + }, + { + "epoch": 2.26, + "grad_norm": 0.8936444520950317, + "learning_rate": 2.9455295275871298e-05, + "loss": 0.322, + "step": 261 + }, + { + "epoch": 2.26, + "eval_loss": 0.4724556505680084, + "eval_runtime": 10.3397, + "eval_samples_per_second": 7.06, + "eval_steps_per_second": 1.838, + "step": 261 + }, + { + "epoch": 2.27, + "grad_norm": 0.8120421767234802, + "learning_rate": 2.8793737323617553e-05, + "loss": 0.2761, + "step": 262 + }, + { + "epoch": 2.28, + "grad_norm": 1.2320939302444458, + "learning_rate": 2.8138441545505366e-05, + "loss": 0.2698, + "step": 263 + }, + { + "epoch": 2.29, + "grad_norm": 1.3531649112701416, + "learning_rate": 2.748946557096407e-05, + "loss": 0.4887, + "step": 264 + }, + { + "epoch": 2.3, + "grad_norm": 1.2009832859039307, + "learning_rate": 2.6846866473633125e-05, + "loss": 0.5149, + "step": 265 + }, + { + "epoch": 2.31, + "grad_norm": 1.0968409776687622, + "learning_rate": 2.6210700766342965e-05, + "loss": 0.3564, + "step": 266 + }, + { + "epoch": 2.32, + "grad_norm": 1.3388766050338745, + "learning_rate": 2.5581024396145116e-05, + "loss": 0.606, + "step": 267 + }, + { + "epoch": 2.32, + "grad_norm": 0.7783334255218506, + "learning_rate": 2.4957892739391765e-05, + "loss": 0.2155, + "step": 268 + }, + { + "epoch": 2.33, + "grad_norm": 0.9703002572059631, + "learning_rate": 2.434136059686587e-05, + "loss": 0.27, + "step": 269 + }, + { + "epoch": 2.34, + "grad_norm": 1.4595576524734497, + "learning_rate": 2.3731482188961818e-05, + "loss": 0.2436, + "step": 270 + }, + { + "epoch": 2.35, + "grad_norm": 1.117234706878662, + "learning_rate": 2.3128311150916826e-05, + "loss": 0.4425, + "step": 271 + }, + { + "epoch": 2.36, + "grad_norm": 0.7211378216743469, + "learning_rate": 2.253190052809434e-05, + "loss": 0.166, + "step": 272 + }, + { + "epoch": 2.37, + "grad_norm": 0.8681732416152954, + "learning_rate": 2.1942302771318712e-05, + "loss": 0.2515, + "step": 273 + }, + { + "epoch": 2.38, + "grad_norm": 0.8737204074859619, + "learning_rate": 2.135956973226262e-05, + "loss": 0.2724, + "step": 274 + }, + { + "epoch": 2.38, + "grad_norm": 1.0407475233078003, + "learning_rate": 2.0783752658887066e-05, + "loss": 0.3217, + "step": 275 + }, + { + "epoch": 2.39, + "grad_norm": 1.7831743955612183, + "learning_rate": 2.021490219093426e-05, + "loss": 0.6522, + "step": 276 + }, + { + "epoch": 2.4, + "grad_norm": 0.8861950039863586, + "learning_rate": 1.9653068355474214e-05, + "loss": 0.3559, + "step": 277 + }, + { + "epoch": 2.41, + "grad_norm": 0.8673079013824463, + "learning_rate": 1.9098300562505266e-05, + "loss": 0.281, + "step": 278 + }, + { + "epoch": 2.42, + "grad_norm": 1.668893814086914, + "learning_rate": 1.8550647600608573e-05, + "loss": 0.5619, + "step": 279 + }, + { + "epoch": 2.43, + "grad_norm": 0.9803675413131714, + "learning_rate": 1.8010157632657543e-05, + "loss": 0.3759, + "step": 280 + }, + { + "epoch": 2.44, + "grad_norm": 1.2397831678390503, + "learning_rate": 1.7476878191582246e-05, + "loss": 0.4992, + "step": 281 + }, + { + "epoch": 2.45, + "grad_norm": 1.776524305343628, + "learning_rate": 1.6950856176189033e-05, + "loss": 0.5687, + "step": 282 + }, + { + "epoch": 2.45, + "grad_norm": 1.4720158576965332, + "learning_rate": 1.6432137847036145e-05, + "loss": 0.4643, + "step": 283 + }, + { + "epoch": 2.46, + "grad_norm": 1.0691683292388916, + "learning_rate": 1.5920768822365418e-05, + "loss": 0.3417, + "step": 284 + }, + { + "epoch": 2.47, + "grad_norm": 1.2835264205932617, + "learning_rate": 1.5416794074090258e-05, + "loss": 0.3619, + "step": 285 + }, + { + "epoch": 2.48, + "grad_norm": 0.9866151213645935, + "learning_rate": 1.4920257923840864e-05, + "loss": 0.4221, + "step": 286 + }, + { + "epoch": 2.49, + "grad_norm": 1.2654749155044556, + "learning_rate": 1.4431204039066082e-05, + "loss": 0.4701, + "step": 287 + }, + { + "epoch": 2.5, + "grad_norm": 0.9995457530021667, + "learning_rate": 1.3949675429193466e-05, + "loss": 0.3161, + "step": 288 + }, + { + "epoch": 2.51, + "grad_norm": 2.263254404067993, + "learning_rate": 1.347571444184661e-05, + "loss": 0.4427, + "step": 289 + }, + { + "epoch": 2.51, + "grad_norm": 8.058012962341309, + "learning_rate": 1.300936275912098e-05, + "loss": 0.8129, + "step": 290 + }, + { + "epoch": 2.51, + "eval_loss": 0.46935132145881653, + "eval_runtime": 10.3973, + "eval_samples_per_second": 7.021, + "eval_steps_per_second": 1.827, + "step": 290 + }, + { + "epoch": 2.52, + "grad_norm": 1.2110286951065063, + "learning_rate": 1.2550661393918217e-05, + "loss": 0.416, + "step": 291 + }, + { + "epoch": 2.53, + "grad_norm": 0.8822147250175476, + "learning_rate": 1.2099650686339303e-05, + "loss": 0.3524, + "step": 292 + }, + { + "epoch": 2.54, + "grad_norm": 1.5716207027435303, + "learning_rate": 1.1656370300136943e-05, + "loss": 0.7171, + "step": 293 + }, + { + "epoch": 2.55, + "grad_norm": 1.3153347969055176, + "learning_rate": 1.1220859219227232e-05, + "loss": 0.4808, + "step": 294 + }, + { + "epoch": 2.56, + "grad_norm": 0.842327356338501, + "learning_rate": 1.0793155744261351e-05, + "loss": 0.3027, + "step": 295 + }, + { + "epoch": 2.57, + "grad_norm": 1.5324808359146118, + "learning_rate": 1.0373297489257272e-05, + "loss": 0.5751, + "step": 296 + }, + { + "epoch": 2.58, + "grad_norm": 1.0970269441604614, + "learning_rate": 9.96132137829171e-06, + "loss": 0.486, + "step": 297 + }, + { + "epoch": 2.58, + "grad_norm": 1.1599726676940918, + "learning_rate": 9.557263642252945e-06, + "loss": 0.3954, + "step": 298 + }, + { + "epoch": 2.59, + "grad_norm": 1.4121760129928589, + "learning_rate": 9.161159815654574e-06, + "loss": 0.5946, + "step": 299 + }, + { + "epoch": 2.6, + "grad_norm": 1.1278576850891113, + "learning_rate": 8.773044733510338e-06, + "loss": 0.4209, + "step": 300 + }, + { + "epoch": 2.61, + "grad_norm": 1.4593509435653687, + "learning_rate": 8.392952528270659e-06, + "loss": 0.4017, + "step": 301 + }, + { + "epoch": 2.62, + "grad_norm": 0.9859544038772583, + "learning_rate": 8.020916626820919e-06, + "loss": 0.3557, + "step": 302 + }, + { + "epoch": 2.63, + "grad_norm": 1.0231760740280151, + "learning_rate": 7.656969747541665e-06, + "loss": 0.2703, + "step": 303 + }, + { + "epoch": 2.64, + "grad_norm": 2.013821840286255, + "learning_rate": 7.301143897431339e-06, + "loss": 0.4877, + "step": 304 + }, + { + "epoch": 2.64, + "grad_norm": 0.8030815720558167, + "learning_rate": 6.953470369291348e-06, + "loss": 0.2235, + "step": 305 + }, + { + "epoch": 2.65, + "grad_norm": 1.060416340827942, + "learning_rate": 6.613979738974074e-06, + "loss": 0.3799, + "step": 306 + }, + { + "epoch": 2.66, + "grad_norm": 1.0030161142349243, + "learning_rate": 6.2827018626939624e-06, + "loss": 0.348, + "step": 307 + }, + { + "epoch": 2.67, + "grad_norm": 1.3924634456634521, + "learning_rate": 5.959665874401765e-06, + "loss": 0.5308, + "step": 308 + }, + { + "epoch": 2.68, + "grad_norm": 1.1038811206817627, + "learning_rate": 5.6449001832223905e-06, + "loss": 0.3892, + "step": 309 + }, + { + "epoch": 2.69, + "grad_norm": 0.842438280582428, + "learning_rate": 5.338432470956589e-06, + "loss": 0.2266, + "step": 310 + }, + { + "epoch": 2.7, + "grad_norm": 1.159005880355835, + "learning_rate": 5.040289689646338e-06, + "loss": 0.2989, + "step": 311 + }, + { + "epoch": 2.71, + "grad_norm": 1.4991456270217896, + "learning_rate": 4.750498059204677e-06, + "loss": 0.3922, + "step": 312 + }, + { + "epoch": 2.71, + "grad_norm": 3.4466562271118164, + "learning_rate": 4.469083065109825e-06, + "loss": 0.7433, + "step": 313 + }, + { + "epoch": 2.72, + "grad_norm": 0.9475330114364624, + "learning_rate": 4.196069456163787e-06, + "loss": 0.3257, + "step": 314 + }, + { + "epoch": 2.73, + "grad_norm": 1.5049362182617188, + "learning_rate": 3.931481242315993e-06, + "loss": 0.6033, + "step": 315 + }, + { + "epoch": 2.74, + "grad_norm": 1.116878628730774, + "learning_rate": 3.6753416925515593e-06, + "loss": 0.44, + "step": 316 + }, + { + "epoch": 2.75, + "grad_norm": 1.2098571062088013, + "learning_rate": 3.427673332845138e-06, + "loss": 0.4076, + "step": 317 + }, + { + "epoch": 2.76, + "grad_norm": 1.5905842781066895, + "learning_rate": 3.188497944179758e-06, + "loss": 0.4355, + "step": 318 + }, + { + "epoch": 2.77, + "grad_norm": 1.0729317665100098, + "learning_rate": 2.9578365606312665e-06, + "loss": 0.3698, + "step": 319 + }, + { + "epoch": 2.77, + "eval_loss": 0.4684131145477295, + "eval_runtime": 10.4099, + "eval_samples_per_second": 7.013, + "eval_steps_per_second": 1.825, + "step": 319 + }, + { + "epoch": 2.77, + "grad_norm": 1.1257330179214478, + "learning_rate": 2.735709467518699e-06, + "loss": 0.4413, + "step": 320 + }, + { + "epoch": 2.78, + "grad_norm": 0.9209752678871155, + "learning_rate": 2.5221361996200955e-06, + "loss": 0.3296, + "step": 321 + }, + { + "epoch": 2.79, + "grad_norm": 1.5466831922531128, + "learning_rate": 2.3171355394546624e-06, + "loss": 0.4308, + "step": 322 + }, + { + "epoch": 2.8, + "grad_norm": 1.1589947938919067, + "learning_rate": 2.120725515630906e-06, + "loss": 0.4266, + "step": 323 + }, + { + "epoch": 2.81, + "grad_norm": 0.8329200744628906, + "learning_rate": 1.932923401261133e-06, + "loss": 0.251, + "step": 324 + }, + { + "epoch": 2.82, + "grad_norm": 1.1109181642532349, + "learning_rate": 1.7537457124423895e-06, + "loss": 0.4488, + "step": 325 + }, + { + "epoch": 2.83, + "grad_norm": 1.07146418094635, + "learning_rate": 1.5832082068039544e-06, + "loss": 0.4141, + "step": 326 + }, + { + "epoch": 2.84, + "grad_norm": 1.1377061605453491, + "learning_rate": 1.4213258821215381e-06, + "loss": 0.5198, + "step": 327 + }, + { + "epoch": 2.84, + "grad_norm": 1.2658833265304565, + "learning_rate": 1.268112974998381e-06, + "loss": 0.4737, + "step": 328 + }, + { + "epoch": 2.85, + "grad_norm": 1.0997015237808228, + "learning_rate": 1.1235829596131232e-06, + "loss": 0.3848, + "step": 329 + }, + { + "epoch": 2.86, + "grad_norm": 1.6518305540084839, + "learning_rate": 9.877485465349058e-07, + "loss": 0.482, + "step": 330 + }, + { + "epoch": 2.87, + "grad_norm": 1.037257194519043, + "learning_rate": 8.606216816055334e-07, + "loss": 0.3971, + "step": 331 + }, + { + "epoch": 2.88, + "grad_norm": 1.3644533157348633, + "learning_rate": 7.422135448889033e-07, + "loss": 0.5474, + "step": 332 + }, + { + "epoch": 2.89, + "grad_norm": 1.7670077085494995, + "learning_rate": 6.325345496877688e-07, + "loss": 0.5247, + "step": 333 + }, + { + "epoch": 2.9, + "grad_norm": 1.5219281911849976, + "learning_rate": 5.315943416279834e-07, + "loss": 0.5653, + "step": 334 + }, + { + "epoch": 2.9, + "grad_norm": 0.8556972742080688, + "learning_rate": 4.3940179781019055e-07, + "loss": 0.3258, + "step": 335 + }, + { + "epoch": 2.91, + "grad_norm": 1.2130467891693115, + "learning_rate": 3.5596502602917027e-07, + "loss": 0.3567, + "step": 336 + }, + { + "epoch": 2.92, + "grad_norm": 0.9883585572242737, + "learning_rate": 2.8129136406075394e-07, + "loss": 0.3741, + "step": 337 + }, + { + "epoch": 2.93, + "grad_norm": 1.21916663646698, + "learning_rate": 2.15387379016585e-07, + "loss": 0.4315, + "step": 338 + }, + { + "epoch": 2.94, + "grad_norm": 1.0760103464126587, + "learning_rate": 1.5825886676649192e-07, + "loss": 0.4929, + "step": 339 + }, + { + "epoch": 2.95, + "grad_norm": 0.998428225517273, + "learning_rate": 1.0991085142886271e-07, + "loss": 0.3855, + "step": 340 + }, + { + "epoch": 2.96, + "grad_norm": 1.0843029022216797, + "learning_rate": 7.034758492872052e-08, + "loss": 0.4067, + "step": 341 + }, + { + "epoch": 2.97, + "grad_norm": 1.2102458477020264, + "learning_rate": 3.9572546623856125e-08, + "loss": 0.4745, + "step": 342 + }, + { + "epoch": 2.97, + "grad_norm": 1.0799614191055298, + "learning_rate": 1.7588442998817122e-08, + "loss": 0.4055, + "step": 343 + }, + { + "epoch": 2.98, + "grad_norm": 1.2217665910720825, + "learning_rate": 4.397207426865002e-09, + "loss": 0.3785, + "step": 344 + }, + { + "epoch": 2.99, + "grad_norm": 1.4693666696548462, + "learning_rate": 0.0, + "loss": 0.5273, + "step": 345 + } + ], + "logging_steps": 1, + "max_steps": 345, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 115, + "total_flos": 9.047728554246144e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/out/checkpoint-345/training_args.bin b/out/checkpoint-345/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1bf29f9aeb8d54142b45324e1e89c4134fa2826b --- /dev/null +++ b/out/checkpoint-345/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4783b55cb3ce48e21fc6968b7681826272dbf548a28fbbd59dcf516adf35655f +size 5688 diff --git a/out/config.json b/out/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b99bfdd971ce16f99a39053dabd66ea1d4c78dac --- /dev/null +++ b/out/config.json @@ -0,0 +1,41 @@ +{ + "_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2", + "architectures": [ + "MistralForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "model_type": "mistral", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "quantization_config": { + "_load_in_4bit": false, + "_load_in_8bit": true, + "bnb_4bit_compute_dtype": "float32", + "bnb_4bit_quant_storage": "uint8", + "bnb_4bit_quant_type": "fp4", + "bnb_4bit_use_double_quant": false, + "llm_int8_enable_fp32_cpu_offload": false, + "llm_int8_has_fp16_weight": false, + "llm_int8_skip_modules": null, + "llm_int8_threshold": 6.0, + "load_in_4bit": false, + "load_in_8bit": true, + "quant_method": "bitsandbytes" + }, + "rms_norm_eps": 1e-05, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.40.0.dev0", + "use_cache": false, + "vocab_size": 32000 +} diff --git a/out/merged/config.json b/out/merged/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7df8f1f379ee9bd4035565e5932686955f6fde05 --- /dev/null +++ b/out/merged/config.json @@ -0,0 +1,26 @@ +{ + "_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2", + "architectures": [ + "MistralForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "model_type": "mistral", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-05, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.40.0.dev0", + "use_cache": false, + "vocab_size": 32000 +} diff --git a/out/merged/generation_config.json b/out/merged/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..16dd90acbcc482b30661bf1c48c719fec177f4a8 --- /dev/null +++ b/out/merged/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "do_sample": true, + "eos_token_id": 2, + "transformers_version": "4.40.0.dev0" +} diff --git a/out/merged/git-lfs-3.2.0/CHANGELOG.md b/out/merged/git-lfs-3.2.0/CHANGELOG.md new file mode 100644 index 0000000000000000000000000000000000000000..d941d6544f60e2be15ff346512e669c9dbb69afc --- /dev/null +++ b/out/merged/git-lfs-3.2.0/CHANGELOG.md @@ -0,0 +1,2225 @@ +# Git LFS Changelog + +## 3.2.0 (25 May 2022) + +This release is a feature release which includes support for machine-readable +formats for a couple more commands, plus the ability to automatically merge +LFS-based text files from the command-line. It's likely that the merge driver +will see future improvements, but if you have feedback on the design, please use +the discussions feature. + +Note that our binary release archives no longer unpack into the current +directory, and now contain a top-level directory just like the source archives +do. + +We would like to extend a special thanks to the following open-source +contributors: + +* @bbodenmiller for fixing the formatting in our manual pages +* @breyed for fixing a typo in our manual pages +* @btoll for improving our README +* @rcoup for fixing our Accept header syntax +* @vtbassmatt for documenting our deprecation of NTLM + +### Features + +* ls-files: add a --json option #5007 (@bk2204) +* Add --json output for git lfs track #5006 (@bk2204) +* Add a merge driver #4970 (@bk2204) +* lfs: don't write hooks when they haven't changed #4935 (@bk2204) +* Tarballs, not tarbombs #4980 (@bk2204) + +### Bugs + +* Apply several Makefile fixes for Windows #5016 (@bk2204) +* git: don't panic on pkt-line without equals #4982 (@bk2204) +* lfshttp: fix invalid Accept header syntax #4996 (@rcoup) +* Grammar fix #4981 (@breyed) +* Use `gitignore`-style path matching for additional commands #4951 (@chrisd8088) +* Avoid pruning when identical files both match and do not match `lfs.fetchexclude` #4973 (@chrisd8088) +* Apply `lfs.fetchexclude` filter to previous commits when pruning #4968 (@chrisd8088) +* Update and correct several error message strings #4943 (@chrisd8088) +* script/upload: correct RHEL 8 package repo #4936 (@bk2204) +* lfs: add old hook content to the list of old hooks #4878 (@bk2204) +* .github/workflows: install packagecloud gem #4873 (@bk2204) + +### Misc + +* Update distros for packagecloud.io #5010 (@bk2204) +* lfshttp: log the Negotiate error on failure #5000 (@bk2204) +* Build CI on Windows 2022 #4997 (@chrisd8088) +* workflows: use ronn-ng #4992 (@bk2204) +* Multiple hash support #4971 (@bk2204) +* note deprecation of NTLM #4987 (@vtbassmatt) +* Update to Go 1.18, drop older Go version support, and update modules and dependencies #4963 (@chrisd8088) +* Update tests to check `prune` command excludes `lfs.fetchexclude` paths #4964 (@chrisd8088) +* Add test to check `prune` command retains tagged unpushed objects #4962 (@chrisd8088) +* Adjust test helpers and tests related to path filtering #4960 (@chrisd8088) +* Include shell path in restricted `PATH` in credential helper path test #4959 (@chrisd8088) +* Build test helper commands with `.exe` file extension on Windows #4954 (@chrisd8088) +* Update Windows signing certificate SHA hash in `Makefile` #4946 (@chrisd8088) +* remove unused `Pipe[Media]Command()` functions #4942 (@chrisd8088) +* Makefile: remove legacy trimpath code #4938 (@bk2204) +* add Inno Setup check of Git install paths and remove old uninstaller checks #4925 (@chrisd8088) +* note `git lfs push --all` only pushes local refs in man page #4898 (@chrisd8088) +* Build man pages into per-section subdirectories #4890 (@chrisd8088) +* Call out destructive command in README #4880 (@btoll) +* Improve formatting #4863 (@bbodenmiller) +* docs/howto: remind core team member to check Actions workflows #4868 (@bk2204) +* .github: fix syntax error in release workflow #4866 (@bk2204) + +## 3.1.4 (19 Apr 2022) + +This release is a bugfix release to fix some problems during the build of +v3.1.3. There are otherwise no substantial changes from v3.1.3. + +### Misc + +* Use only Windows Server 2019 runners for CI in GitHub Actions #4883 (@chrisd8088) +* remove unused `Pipe[Media]Command()` functions #4942 (@chrisd8088) + +## 3.1.3 (19 Apr 2022) + +This release introduces a security fix for Windows systems, which has been +assigned CVE-2022-24826. + +On Windows, if Git LFS operates on a malicious repository with a `..exe` file as +well as a file named `git.exe`, and `git.exe` is not found in PATH, the `..exe` +program will be executed, permitting the attacker to execute arbitrary code. +Similarly, if the malicious repository contains files named `..exe` and +`cygpath.exe`, and `cygpath.exe` is not found in PATH, the `..exe` program will +be executed when certain Git LFS commands are run. + +This security problem does not affect Unix systems. This is the same issue as +CVE-2020-27955 and CVE-2021-21237, but the fix for those issue was incomplete +and certain options can still cause the problem to occur. + +This occurs because on Windows, Go includes (and prefers) the current directory +when the name of a command run does not contain a directory separator, and it +continues to search for programs even when the specified program name is empty. +This has been solved by failing if the path is empty or not found. + +We would like to extend a special thanks to the following open-source +contributors: + +* @yuske for reporting this to us responsibly + +### Bugs + +* Report errors when finding executables and revise PATH search tests (@chrisd8088) + +### Misc + +* Update Windows signing certificate SHA hash in Makefile (@chrisd8088) + +## 3.1.2 (16 Feb 2022) + +This is a bugfix release which fixes a bug in `git lfs install` and some issues +in our CI release processes, including one that prevented arm64 packages for +Debian 11 from being uploaded. + +### Bugs + +* lfs: add old hook content to the list of old hooks #4878 (@bk2204) + +### Misc + +* Revert "Merge pull request #4795 from bk2204/actions-checkout-v2" #4877 (@bk2204) +* .github/workflows: install packagecloud gem #4873 (@bk2204) + +## 3.1.1 (14 Feb 2022) + +This is a bugfix release which fixes a syntax error in the release workflow. + +### Misc + +* .github: fix syntax error in release workflow #4866 (@bk2204) + +## 3.1.0 (14 Feb 2022) + +This release is a feature release which includes support for fallback from +Negotiate to Basic authentication, new ARM64 packages for Debian 11, a new +localization infrastructure, and improved netrc support, in addition to various +bug fixes. In addition, we've addressed a performance regression for `git lfs +migrate import` that was introduced in v3.0.2. + +At the moment, there are no translations available, but if you are interested in +contributing to one, please reach out in an issue. For compatibility with +Windows and to retain the ability to have a single relocatable binary, the +translations are included in the binary at build time. + +We would like to extend a special thanks to the following open source +contributors: + +* @donno2048 for improving our error checking +* @howardlyliao for improved netrc support +* @HermannDppes for improving our large file warning on Windows +* @rex4539 for fixing various typos throughout our codebase + +### Features + +* Fall back from Negotiate to Basic #4815 (@bk2204) +* Add basic support for localization #4729 (@bk2204) +* Add support for ARM64 Debian packages #4728 (@bk2204) +* netrc: consider same machine may have different login names #4726 (@howardlyliao) + +### Bugs + +* smudge: honor GIT_LFS_SKIP_SMUDGE with checkout-index #4860 (@bk2204) +* fix `git lfs fsck --objects A..B` handling and drop all left/right ref terminology #4859 (@chrisd8088) +* halt migration when `.gitattributes` symbolic link encountered #4849 (@chrisd8088) +* fix merging of `.gitattributes` with execute file mode during migration #4835 (@chrisd8088) +* Fix migrate import speed regression #4813 (@bk2204) +* Fix typos #4806 (@rex4539) +* Move `err` checking to before the value was used #4776 (@donno2048) +* migrate import: don't allow path filters with --above #4771 (@bk2204) +* avoid panic on checkout with `--to` but no path, and update checkout manual #4766 (@chrisd8088) +* creds: gracefully handle lack of askpass helper #4759 (@bk2204) +* post-checkout: don't modify permissions of untracked files #4760 (@bk2204) +* use gitattributes filepath matching for migrate filter options #4758 (@chrisd8088) +* Avoid errors in git lfs env #4713 (@bk2204) +* fs: specify a file as existing if it's empty #4654 (@bk2204) +* Fix bound for largefilewarning #4633 (@HermannDppes) + +### Misc + +* build missing man pages and correct HTML renderings #4851 (@chrisd8088) +* Update and mark message strings for translation #4846 (@chrisd8088) +* Mark almost all strings for translation #4781 (@bk2204) +* .github/workflows: switch to actions/checkout@v2 #4795 (@bk2204) +* script/packagecloud: update for latest distros #4794 (@bk2204) +* filter-process: don't print large file warning on fixed versions #4768 (@bk2204) +* ssh: avoid using -- where possible #4741 (@bk2204) +* vendor,go.*: update x/crypto and dependencies #4738 (@chrisd8088) +* Stop supporting Go older than 1.13 #4641 (@bk2204) + +## 3.0.2 (28 Oct 2021) + +This release is a bugfix release which fixes a variety of problems seen since +3.0.0, including problems with empty files, `git lfs fsck --pointers`, and +the testsuite. + +We would like to extend a special thanks to the following open-source +contributors: + +* @fh1ch for patches to make things work better on Alpine Linux +* @pyckle for fixing our handling of filenames in `git lfs migrate import` +* @ycongal-smile for fixing `git lfs migrate import` with similarly named files + +### Bugs + +* Fix two types of misdetection in git lfs fsck #4697 (@bk2204) +* lfs: don't flag non-LFS files as invalid pointers #4691 (@bk2204) +* git: honor GIT_OBJECT_DIRECTORY #4686 (@bk2204) +* migrate: properly escape blob filenames #4683 (@pyckle) +* ls-files: don't process empty files as pointers #4681 (@bk2204) +* Call migrate() BlobFn on every blob #4671 (@ycongal-smile) +* Correct t-lock regular expression to be musl compatible #4673 (@fh1ch) + +### Misc + +* Allow git-lfs-transfer integration tests to be skipped #4677 (@fh1ch) +* Make CI environment GIT prefix grep more specific #4678 (@fh1ch) + +## 3.0.1 (28 Sep 2021) + +This release is a bugfix release which fixes the Windows ARM64 build process and +addresses a regression in support for empty files in pull and fetch. + +We would like to extend a special thanks to the following open-source +contributors: + +* @dennisameling for fixing support for Windows on ARM64 + +### Bugs + +* Fix Windows arm64 release #4647 (@dennisameling) +* fs: specify a file as existing if it's empty #4654 (@bk2204) + +## 3.0.0 (24 Sep 2021) + +This release is a major new release and introduces several new features, such as +a pure SSH-based protocol, packages for several new OS versions, support for +ARM64 Windows, Git-compatible pattern matching, and locking multiple files on +the command line, among other items. + +When connecting over SSH, the first attempt will be made to use +`git-lfs-transfer`, the pure SSH protocol, and if it fails, Git LFS will fall +back to the hybrid protocol using `git-lfs-authenticate`. Note that no major +forges are known to support the pure SSH protocol at this time. + +Because it is a major release, we've made some backwards-incompatible changes. +A (possibly incomplete) list of them is as follows: + +* NTLM support has been completely removed, since nobody volunteered to fix + issues in it. Users are advised to use Kerberos or Basic authentication + instead. +* When using an SSH URL (that is, the syntax starting with `ssh://`), the + leading slash is not stripped off when invoking `git-lfs-authenticate` or + `git-lfs-transfer`. This is compatible with the behavior of Git when invoking + commands over SSH. +* `git lfs fsck` now additionally checks that pointers are canonical and that + files that are supposed to be LFS files actually are. It also exits nonzero + if any problem is found. +* Pattern matching should be stricter and should either match the behavior of + `.gitattributes` or `.gitignore`, as appropriate. Deviations from Git's + behavior will henceforth be treated as bugs and fixed accordingly. +* Git LFS will now write a Git LFS repository format version into the + repository. This is designed to allow future extension with incompatible + changes. Repositories without this version will be assumed to be version 0. + Note that this is different from, but modeled on, Git's repository format + version. +* `git lfs lock` and `git lfs unlock` now handle multiple pathname arguments and + the JSON format has changed to handle multiple responses. +* The Go package name now contains a version number. This should have no effect + on users because we don't provide a stable Go ABI. +* Empty components in `PATH` are no longer treated as the current directory on + Windows because unintentionally having such empty components is common and the + behavior was surprising. + +We would like to extend a special thanks to the following open-source +contributors: + +* @codykrieger for ensuring that we process includes correctly +* @corngood for fixing a hang in prune +* @dennisameling for adding support for Windows on ARM64 +* @fh1ch for fixing our 429 handling +* @geki-yaba for fixing problems with askpass on Cygwin +* @gison93 for fixing a bug in our documentation +* @jvimr for ensuring our Debian packages are built properly +* @opohorel for ensuring our copyright notices were up to date +* @rhansen for fixing systems where / is a repository +* @sergiou87 for improving support for cross builds +* @slonopotamus for improving our error handling +* @stanhu for improving our handling of invalid OIDs +* @Timmmm for improving our support of .lfsconfig +* @tklauser for avoiding the need for cgo on macOS + +### Features + +* Advertise hash algorithm supported in batch request #4624 (@bk2204) +* Bump package version to v3 #4611 (@bk2204) +* Update OS versions #4610 (@bk2204) +* Add support for Debian 11 #4592 (@bk2204) +* Support for locking and unlocking multiple files #4604 (@bk2204) +* Add support for Windows ARM64 #4586 (@dennisameling) +* LFS repository format version #4552 (@bk2204) +* Pure SSH-based protocol #4446 (@bk2204) +* Make fsck able to check for invalid pointers #4525 (@bk2204) +* Add --fixup option to migrate info command #4501 (@chrisd8088) +* Allow reporting of LFS pointers separately in migrate info command #4436 (@chrisd8088) +* Add config variables for default remotes #4469 (@bk2204) +* Make lfshttp package builds more portable #4476 (@bk2204) +* Mark skipdownloaderrors as safe #4468 (@Timmmm) +* Make migrate commands default to preserving uncommitted changes #4454 (@chrisd8088) +* Darwin ARM64 support #4437 (@bk2204) +* tools: implement cloneFileSyscall on darwin without cgo #4387 (@tklauser) +* prune: add options to be more aggressive about pruning #4368 (@bk2204) + +### Bugs + +* corrected debian 11 & 12 derived variants #4622 (@jvimr) +* urlconfig: anchor regexp for key matching #4598 (@bk2204) +* filepathfilter: always use Git-compatible pattern matching #4556 (@bk2204) +* debian and rpm: Pass `--skip-repo` to `install` and `uninstall` #4594 (@rhansen) +* Fix hang in prune #4557 (@corngood) +* Disable ANSI color codes while log parsing and anchor diff regular expressions #4585 (@chrisd8088) +* Fix 429 retry-after handling for LFS batch API endpoint #4573 (@fh1ch) +* go.mod: bump gitobj to v2.0.2 #4555 (@bk2204) +* Fix locking with multiple paths and absolute paths #4535 (@bk2204) +* locking: avoid nil pointer dereference with invalid response #4509 (@bk2204) +* migrate import: make --above affect only individual files #4512 (@bk2204) +* fs: be a little less aggressive with cleanup #4490 (@bk2204) +* Fix downloadFile in gitfilter_smudge.go to actually propagate all errors #4478 (@slonopotamus) +* Translate Cygwin path patches for askpass helper and cert dir/file #4473 (@geki-yaba) +* Avoid panic on SIGINT by skipping cleanup when config uninitialized #4463 (@chrisd8088) +* Parse stash log entries parsimonously in prune command #4449 (@chrisd8088) +* docs: note that -I and -X override configuration settings #4442 (@bk2204) +* Make all checks of blobSizeCutoff consistent #4435 (@chrisd8088) +* Fix up handling of the "migrate info" command's --top option #4434 (@chrisd8088) +* Tighten LFS pointer regexp #4421 (@stanhu) +* invoke git-config with --includes to ensure it always evaluates `include.*` directives #4420 (@codykrieger) +* Canonicalize Windows paths like Git does #4418 (@bk2204) +* lfsapi: don't warn about duplicate but identical aliases #4413 (@bk2204) +* lfs: don't invoke diff drivers when pruning repositories #4407 (@bk2204) +* Consider scheme of request URL, not proxy URL, when choosing proxy #4396 (@bk2204) +* Makefile: allow make release to be run twice in a row #4344 (@bk2204) +* Makefile: don't fail the second time macOS builds are built #4341 (@bk2204) + +### Misc + +* subprocess: don't treat empty PATH component as . on Windows #4603 (@bk2204) +* Switch from which to command -v #4591 (@bk2204) +* Bump Go to 1.17 #4584 (@dennisameling) +* Add cautions about unstable Go API and fix GPG key link #4582 (@chrisd8088) +* Update go.mod module path with explicit v2 #4575 (@chrisd8088) +* Drop unused ClearTempStorage() transfer adapter method and tune stale comments #4554 (@chrisd8088) +* README: improve steps for building from source #4527 (@bk2204) +* Update license year #4513 (@opohorel) +* docs/man: add note re post-import use of checkout #4504 (@chrisd8088) +* Bump transitive dependencies #4502 (@bk2204) +* script/packagecloud: update distros #4494 (@bk2204) +* Use host architecture and OS when running `go generate` #4492 (@sergiou87) +* Bump go-spnego to the latest version #4482 (@bk2204) +* Update git-lfs-migrate man page and add description section #4458 (@chrisd8088) +* update x/text and dependencies #4455 (@opohorel) +* Use blobSizeCutoff in clean pointer buffer length check #4433 (@chrisd8088) +* tools: unset XDG_CONFIG_HOME for filetools test #4432 (@chrisd8088) +* vendor,go.{mod,sum}: update x/net and dependencies #4398 (@chrisd8088) +* Remove NTLM #4384 (@bk2204) +* gitobj 2.0.1 #4348 (@bk2204) +* Fix numbered list in git lfs examples #4347 (@gison93) +* Add test for download gzip transport compression #4345 (@bk2204) + +## 2.13.3 (26 Mar 2021) + +This release fixes two bugs that caused `git lfs prune` to hang, updates some +dependencies to versions which lack a security issue (which did not affect Git +LFS), and adds support for ARM64 builds on macOS. + +### Bugs + +* lfs: don't invoke diff drivers when pruning repositories #4407 (@bk2204) +* Parse stash log entries parsimonously in prune command #4449 (@chrisd8088) + +### Misc + +* Darwin ARM64 support #4437 (@bk2204) +* vendor,go.{mod,sum}: update x/net and dependencies #4398 (@chrisd8088) + +## 2.13.2 (13 Jan 2021) + +This release introduces a security fix for Windows systems, which has been +assigned CVE-2021-21237. + +On Windows, if Git LFS operates on a malicious repository with a git.bat or +git.exe file in the current directory, that program is executed, permitting the +attacker to execute arbitrary code. This security problem does not affect Unix +systems. This is the same issue as CVE-2020-27955, but the fix for that issue +was incomplete and certain options can still cause the problem to occur. + +This occurs because on Windows, Go includes (and prefers) the current directory +when the name of a command run does not contain a directory separator. This has +been solved by always using PATH to pre-resolve paths before handing them to Go. + +We would like to extend a special thanks to the following open-source +contributors: + +* @Ry0taK for reporting this to us responsibly + +### Bugs + +* Use subprocess for invoking all commands (@bk2204) + +## 2.13.1 (11 Dec 2020) + +This release fixes a bug in our build tooling that prevents our release process +from working properly. This release is otherwise identical to 2.13.0. + +### Misc + +* Makefile: don't fail the second time macOS builds are built #4341 (@bk2204) + +## 2.13.0 (10 Dec 2020) + +This release introduces several new features, such as the `--above` option to +`git lfs migrate import` and support for `socks5h` proxies. In addition, many +bugs have been fixed and several miscellaneous fixes have been included. + +Unless someone steps up to fix and maintain NTLM support, this will be the last +Git LFS release to support NTLM. See #4247 for more details. Note that Git LFS +supports Kerberos as well, which is far more secure and may be a viable +replacement in many situations. + +We would like to extend a special thanks to the following open-source +contributors: + +* @EliRibble for adding support for the `--above` option to `git lfs migrate import` +* @andrewshadura for adding support for the `GIT_LFS_SKIP_PUSH` environment variable +* @sinbad for fixing problems with retaining objects used by stashes +* @tklauser for cleaning up our use of error constants in the code + +### Features + +* Add --above parameter to 'migrate import'. #4276 (@EliRibble) +* Add GIT_LFS_SKIP_PUSH to allow skipping the pre-push hook #4202 (@andrewshadura) +* lfshttp: add support for socks5h proxies #4259 (@bk2204) +* Add manual pages to release assets #4230 (@bk2204) +* Honor GIT_WORK_TREE #4269 (@bk2204) + +### Bugs + +* Make git lfs migrate import handle missing extensions #4318 (@bk2204) +* fs: don't panic when using a too-short object ID to push #4307 (@bk2204) +* Fix pattern matching for .gitattributes #4301 (@bk2204) +* config: map missing port to default for HTTP key lookups #4282 (@bk2204) +* tools: use IoctlFileClone from golang.org/x/sys/unix #4261 (@tklauser) +* tools/util_darwin.go: Remove use of direct syscalls #4251 (@stanhu) +* tools: always force a UTF-8 locale for cygpath #4231 (@bk2204) +* prune: fix deleting objects referred to by stashes #4209 (@sinbad) + +### Misc + +* migrate import: warn about refs on case insensitive file systems #4332 (@larsxschneider) +* Drop obsolete OS support #4328 (@bk2204) +* tools: use ERROR_SHARING_VIOLATION const from golang.org/x/sys/windows #4291 (@tklauser) +* pull: gracefully handle merge conflicts #4289 (@bk2204) +* script/upload: avoid using Ruby's URI.escape #4266 (@bk2204) +* add documentation of security bug report process #4244 (@chrisd8088) + +## 2.12.1 (4 Nov 2020) + +This release introduces a security fix for Windows systems, which has been +assigned CVE-2020-27955. + +On Windows, if Git LFS operates on a malicious repository with a git.bat or +git.exe file in the current directory, that program is executed, permitting the +attacker to execute arbitrary code. This security problem does not affect Unix +systems. + +This occurs because on Windows, Go includes (and prefers) the current directory +when the name of a command run does not contain a directory separator. This has +been solved by always using PATH to pre-resolve paths before handing them to Go. + +We would like to extend a special thanks to the following open-source +contributors: + +* @dawidgolunski for reporting this to us responsibly + +### Bugs + +* subprocess: avoid using relative program names (@bk2204) + +## 2.12.0 (1 Sep 2020) + +This release introduces several new features, such as support for the SHA-256 +repositories coming in a future version of Git, restored support for Go 1.11, +the ability to read the contents of .lfsconfig from the repository, signed and +notarized binaries on macOS, and pre-built 32-bit ARM binaries on Linux. In +addition, several bugs have been fixed and miscellaneous fixes included. + +Note that macOS releases are now shipped as zip files, not tarballs, since it is +not possible to notarize tarballs. macOS releases are now also built on macOS, +so `git lfs dedup` should now function. + +We would like to extend a special thanks to the following open-source +contributors: + +* @saracen for adding support for ARM binaries +* @mversluys for improving locking support +* @cccfeng for updating our documentation to make it more readable +* @bluekeyes for improving performance and tracing +* @gertcuykens for adding missing parts of our documentation + +### Features + +* config: optionally read .lfsconfig from the repository #4200 (@bk2204) +* Support SHA-256 repositories #4186 (@bk2204) +* allow Go 1.11 builds by using WaitStatus.ExitStatus() #4183 (@chrisd8088) +* add --worktree option to install and uninstall commands #4159 (@chrisd8088) +* Sign and notarize binaries on macOS #4143 (@bk2204) +* Makefile: add linux arm build and release targets #4126 (@saracen) +* Allow locking and unlocking non-existent files #3992 (@mversluys) + +### Bugs + +* docs/api/locking: add an explicit
#4208 (@cccfeng) +* Fix hang when the user lacks permissions #4207 (@bk2204) +* Don't mark unlocked files that aren't lockable as read-only #4171 (@bk2204) +* locking: make patterns with slashes work on Windows #4139 (@bk2204) +* git: consider full refspec when determining seen refs #4133 (@bk2204) + +### Misc + +* Fix Windows CI #4199 (@bk2204) +* Fix testsuite when working with non-master default branch #4174 (@bk2204) +* git: improve performance of remote ref listing #4176 (@bluekeyes) +* subprocess: trace all command execution #4175 (@bluekeyes) +* Update git-lfs-migrate.1.ronn #3869 (@gertcuykens) +* t: use repo v1 with extensions #4177 (@bk2204) +* Makefile: ensure temp Go modules can be deleted #4157 (@chrisd8088) +* Improve test suite robustness via environment #4132 (@bk2204) + +## 2.11.0 (8 May 2020) + +This release introduces several new features, such as better support for unnamed +local paths and URLs as remotes, support for `submodule.recurse`, exponential +backoff on failure, and support for renegotiation. In addition, numerous bugs +have been fixed and miscellaneous issues have been addressed. + +We would like to extend a special thanks to the following open-source +contributors: + +* @bluekeyes for adding support for exponential backoff +* @pluehne for adding support for `submodule.recurse` +* @Electric26 for fixing the default behavior of a prompt +* @nataliechen1 for fixing certain upload retry failures +* @shalashik for fixing a panic during cherry-pick +* @swisspol for updating our documentation to reflect supported `.lfsconfig` + keys +* @dan2468 for updating the copyright year + +### Features + +* Allow literal local paths as remotes #4119 (@bk2204) +* pre-push: find named remote for URL if possible #4103 (@bk2204) +* tq: add exponential backoff for retries #4097 (@bluekeyes) +* migrate import: set text to unspecified for excluded fields #4068 (@bk2204) +* Update list of distros for packagecloud.io #4080 (@bk2204) +* lfshttp: allow renegotiation #4066 (@bk2204) +* Support submodule.recurse = true #4063 (@pluehne) +* add man page for the post-commit hook command #4052 (@chrisd8088) +* Add an option to control warning about files larger than 4 GiB #4009 (@bk2204) + +### Bugs + +* commands/command_migrate.go: fix bug #4116 (@Electric26) +* git: avoid "bad object" messages when force-pushing #4102 (@bk2204) +* git: avoid trying to rewrite remote tags as remote branches #4096 (@bk2204) +* make Go tests run consistently using local binary #4084 (@chrisd8088) +* commands: don't honor lfs.fetch* for ls-files #4083 (@bk2204) +* commands: print help output with --help #4059 (@bk2204) +* fail dedup command with explanation when LFS extensions configured #4045 (@chrisd8088) +* fix upload retry 'file already closed' issue' #4042 (@nataliechen1) +* commands/command_filter_process: cherry-pick of several commits cause panic error #4017 (@shalashik) +* Check error when creating local storage directory #4016 (@bk2204) +* track: detect duplicate patterns with --filename #4000 (@bk2204) + +### Misc + +* Removed lfs.extension.* from list of supported keys for .lfsconfig #4044 (@swisspol) +* Tidy modules #4035 (@bk2204) +* README: explain how to verify releases #4022 (@bk2204) +* docs: document git lfs migrate --yes #4023 (@bk2204) +* Stop using cgo on amd64 Linux #4026 (@bk2204) +* updated copyright year #3995 (@dan2468) + +## 2.10.0 (21 January 2020) + +This release introduces several new features, such as support for local paths in +remotes, Kerberos support, and official binaries for S390x and little-endian +64-bit PowerPC systems. In addition, numerous bugs have been fixed and +miscellaneous issues have been addressed. + +We would like to extend a special thanks to the following open-source +contributors: + +* @ganadist for fixing a bug in the output of `git lfs env` +* @exceed-alae for fixing a possible nil pointer dereference +* @slonopotamus for improvements to Windows support and code cleanups +* @nataliechen1 for fixing a data race +* @ssgelm for writing and updating the code to use a new cookie jar parser +* @austintraver for improving the output of `git lfs status` +* @nikola-sh for improving option parity with Git +* @alrs for fixing several error checks in the testsuite +* @pluehne for improving our support for uncommon references + +### Features + +* Optimize pushes for multiple refs #3978 (@bk2204) +* Include ppc64le and s390x Linux builds in releases #3983 (@bk2204) +* Kerberos (SPNEGO) support for HTTP #3941 (@bk2204) +* Add support for local paths #3918 (@bk2204) +* Allow specifying HTTP version to use #3887 (@bk2204) + +### Bugs + +* t-duplicate-oids: use correct awk indexing #3981 (@bk2204) +* Improved proxy support #3972 (@bk2204) +* install: don't print error if run outside repository #3969 (@bk2204) +* debian: bump version of golang-go #3959 (@bk2204) +* lfshttp: Set valid default value for lfs.concurrenttransfers #3949 (@ganadist) +* Add nil-check on defer block of DoTransfer() #3936 (@exceed-alae) +* Retry batch failures #3930 (@bk2204) +* rpm: use old setup code on CentOS 7 #3938 (@bk2204) +* Interpret relative hook paths as relative to working tree #3926 (@bk2204) +* Handle missing cygpath gracefully #3910 (@bk2204) +* Update index before showing status #3921 (@bk2204) +* Honor lfs.url when deciding on transfer adapters #3905 (@bk2204) +* Implement retry logic to fix LFS storage race conditions on Windows #3890 (@slonopotamus) +* Avoid hang when using git hash-object --stdin --path #3902 (@bk2204) +* synchronize access to netrcCredentialHelper.skip #3896 (@nataliechen1) + +### Misc + +* Improve license files #3973 (@bk2204) +* Add CI link to CI badge in README #3960 (@slonopotamus) +* Clarify output shown by `git lfs status` #3953 (@austintraver) +* Revert "ci: force Windows Git version to 2.22.0" #3903 (@bk2204) +* Better document pointer format constraints #3944 (@bk2204) +* Don't abort with newer Git when in a bare repo #3940 (@bk2204) +* Fix more Linux package issues #3932 (@bk2204) +* docs: explain shell metacharacters better #3920 (@bk2204) +* Reset the text attribute on export #3913 (@bk2204) +* Support schannel ssl backend #3868 (@nikola-sh) +* Allow migrate export to handle non-pointer files gracefully #3911 (@bk2204) +* git/gitattr: fix dropped test errors #3904 (@alrs) +* Accept all local references with git lfs push #3876 (@pluehne) +* Drop pre-1.6 Go compatibility code #3897 (@slonopotamus) +* tools/kv: Fix dropped test error #3882 (@alrs) +* Use different parser for cookiejar files #3886 (@ssgelm) +* Stop replacing files in LFS storage when downloading them concurrently on Windows #3880 (@slonopotamus) +* Fix error strings to follow Go guidelines #3871 (@slonopotamus) +* Miscellaneous release fixes #3866 (@bk2204) + +## 2.9.2 (12 December 2019) + +This release fixes a few regressions, such as a possible nil pointer +dereference, a failure to retry batch requests, and a bug where repositories +could fail to be detected on Windows. + +We would like to extend a special thanks to the following open-source +contributors: + +* @exceed-alae for fixing a possible nil pointer dereference + +### Bugs + +* Add nil-check on defer block of DoTransfer() #3936 (@exceed-alae) +* Retry batch failures #3930 (@bk2204) +* rpm: use old setup code on CentOS 7 #3938 (@bk2204) +* Handle missing cygpath gracefully #3910 (@bk2204) + +### Misc + +* Don't abort with newer Git when in a bare repo #3940 (@bk2204) +* Fix more Linux package issues #3932 (@bk2204) + +## 2.9.1 (25 November 2019) + +This release fixes a few regressions, such as the ability to use HTTP/1.1 when +required, addresses a race condition, and switches the cookie jar parser to +something that's easier for distributions to package. + +We would like to extend a special thanks to the following open-source +contributors: + +* @nataliechen1 for fixing a data race +* @ssgelm for writing and updating the code to use a new cookie jar parser + +### Features + +* Allow specifying HTTP version to use #3887 (@bk2204) + +### Bugs + +* synchronize access to netrcCredentialHelper.skip #3896 (@nataliechen1) +* Fix several causes of CI problems #3878 (@bk2204) +* Miscellaneous release fixes #3866 (@bk2204) + +### Misc + +* Build artifacts during CI for easier testing #3892 (@bk2204) +* Use different parser for cookiejar files #3886 (@ssgelm) + +## 2.9.0 (17 October 2019) + +This release adds support for DragonFly BSD, adds a new `git lfs dedup` command +to save space if the file system supports it, adds support for file URLs, +improves the performance when walking the repository, contains improvements +to use HTTP/2 when available and cookies when required, and numerous other bug +fixes, features, and modifications. + +We would like to extend a special thanks to the following open-source +contributors: + +* @pluehne for adding support for fetching the history of specific refs +* @kupson for adding cookie support +* @liweitianux for adding Dragonfly BSD support +* @kazuki-ma for implementing de-duplication support +* @dvdveer for adding range support to ls-files +* @dyrone, @pmeerw, @yamiacat, and @kittenking for cleaning up some documentation issues +* @slonopotamus for improving concurrent downloads +* @nataliechen1 for fixing remote names with dots +* @jw3 for removing excessive logging +* @SeamusConnor for significantly improving performance when walking the repository + +### Features + +* Support fetching entire history of specific refs #3849 (@pluehne) +* Add support for CentOS 8 #3854 (@bk2204) +* Let git-lfs HTTPS transport send cookies #3825 (@kupson) +* Support DragonFly BSD #3837 (@liweitianux) +* HTTP/2 protocol support #3793 (@PastelMobileSuit) +* Add clonefile on Windows over ReFS support. #3790 (@kazuki-ma) +* Add new command `git lfs dedup` for file system level de-duplication. #3753 (@kazuki-ma) +* Support GIT_ALTERNATE_OBJECT_DIRECTORIES #3765 (@bk2204) +* ls-files: add support for reference range #3764 (@dvdveer) +* Add several additional distros for packagecloud.io #3751 (@bk2204) +* Provide an option to track to handle paths literally #3756 (@bk2204) +* Optimize traversal of Git objects with URL remotes #3755 (@bk2204) +* Support for file URLs #3748 (@bk2204) +* Add clone file on MacOS X (darwin). #3745 (@kazuki-ma) + +### Bugs + +* Fix JSON comma problems in docs #3851 (@dyrone) +* Remove redundant comma in batch.md #3841 (@dyrone) +* More robust handling of parallel attempts to download the same file #3826 (@slonopotamus) +* Update wildmatch to v1.0.4 #3820 (@bk2204) +* Update to gitobj v1.4.1 #3815 (@bk2204) +* Fix build error when cross-compiling #3817 (@bk2204) +* Do not fail when multiple processes download the same lfs file #3813 (@slonopotamus) +* Fix Remote Name Parsing Bug #3812 (@nataliechen1) +* status: gracefully handle files replaced by directories #3768 (@bk2204) +* Avoid deadlock when transfer queue fails #3800 (@bk2204) +* Avoid a hang when Git is slow to provide us data #3806 (@bk2204) +* tasklog/log.go: print "done" messages with a trailing period #3789 (@ttaylorr) +* track: make --filename work with spaces #3785 (@bk2204) +* Fix couple of 'the the' typos #3786 (@pmeerw) +* Use an absolute path for smudging #3780 (@bk2204) +* Fix URL parsing with Go 1.12.8 #3771 (@bk2204) +* Fix remote autoselection when not on a branch #3759 (@bk2204) +* Replace deprecated SEEK_SET, SEEK_CUR usage. #3739 (@kazuki-ma) +* Do not log skipped checkouts to file #3736 (@jw3) +* Fix typos across git-lfs repository #3728 (@kittenking) +* Accept legacy Git SSH URLs #3713 (@bk2204) + +### Misc + +* ls-files --all man patch #3859 (@yamiacat) +* Reworked to use git ls-files in some circumstances instead of FastWalkGitRepo #3823 (@SeamusConnor) +* Clean up go.mod for Go 1.13 #3807 (@bk2204) +* Use FICLONE instead of BTRFS_IOC_CLONE. #3796 (@kazuki-ma) +* Remove unused pty code #3737 (@bk2204) + +## 2.8.0 (23 July 2019) + +This release adds support for SOCKS proxies and Windows junctions, adds native +packages for Debian 10 and similar distributions, reduces the number of +situations in which running `git lfs fetch --all` is required, improves +compatibility with Cygwin, and numerous other bug fixes and modifications. + +We would like to extend a special thanks to the following open-source +contributors: + +* @mstrap for adding support for listing lock owners +* @h-hirokawa for adding support for rewriting object URLs +* @slonopotamus for helping get our specs and implementation in sync +* @ssgelm for improving our Debian packaging +* @TBK for fixing a test +* @hartzell for improving the compatibility of our Makefile +* @AJH16 for implementing support for NTLM SSO + +### Features + +* Don't fail if we lack objects the server has #3634 (@bk2204) +* Add support for Debian 10 #3697 (@bk2204) +* Migrate tags pointing to other tags #3690 (@bk2204) +* Add support for SOCKS proxies #3677 (@bk2204) +* Allow vendors to customize the version info #3636 (@bk2204) +* Wrap credential data to allow late credential prompting and update NTLM/SSPI to attempt SSPI login prior to prompting for credentials. #3617 (@AJH16) +* git-lfs locks should optionally denote own locks #3569 (@mstrap) +* tq/adapterbase: support rewriting href #3590 (@h-hirokawa) +* Handle Windows junctions properly #3560 (@bk2204) +* Allow specifying multiple insteadOf aliases #3550 (@bk2204) + +### Bugs + +* Make API documentation lock creation example less confusing #3648 (@bk2204) +* Use a download token when searching locks #3715 (@bk2204) +* Copy mode from original file when rewriting objects #3694 (@bk2204) +* Don't recurse into submodules when walking repository #3686 (@bk2204) +* Be more precise about what timestamps we accept #3680 (@bk2204) +* Canonicalize common directory paths on Cygwin #3671 (@bk2204) +* Ensure we always use correct Content-Type and Accept headers #3663 (@bk2204) +* Fix 'owner' lock field not documented as optional #3651 (@slonopotamus) +* Improve error handling in git lfs install #3624 (@bk2204) +* Properly handle config options for URLs with upper case letters #3584 (@bk2204) +* Detect Cygwin-style pipe TTYs as TTYs #3582 (@bk2204) +* Set push remote for pre-push #3579 (@bk2204) +* Switch from manually running go generate to using dh-golang to run it #3549 (@ssgelm) +* Install worktree hooks in the proper directory #3547 (@bk2204) +* Avoid nil pointer dereference on download failure #3537 (@bk2204) +* Avoid nil pointer dereference on unexpected failure #3534 (@bk2204) + +### Misc + +* Update gitobj to v1.3.1 #3716 (@bk2204) +* Use default line endings for core.autocrlf=input #3712 (@bk2204) +* Fix CircleCI #3710 (@bk2204) +* Vendor in gitobj v1.3.0 #3685 (@bk2204) +* Update CONTRIBUTING #3673 (@bk2204) +* Fix typo in t-locks.sh #3666 (@TBK) +* Make 'locked_at' required in JSON schema #3655 (@slonopotamus) +* Document a new batch error code #3639 (@bk2204) +* Fix invalid JSON in LFS locking API documentation #3650 (@slonopotamus) +* Fix invalid JSON in locking protocol docs #3644 (@slonopotamus) +* Various release updates #3623 (@bk2204) +* tq/adapterbase: fix typo enableHrefRerite to enableHrefRewrite #3594 (@h-hirokawa) +* Use git-lfs version of go-ntlm #3588 (@bk2204) +* Log test server standard error to log file #3577 (@bk2204) +* Don't set -extldflags unless LDFLAGS has a value #3545 (@hartzell) +* Retry on oversize file #3518 (@bk2204) +* Fix asset uploading during releases #3538 (@bk2204) + +## 2.7.0 (15 February 2019) + +This release adds better support for large files on 32-bit systems, adds +attribute macros, fixes several file descriptor leaks, improves compatibility +with Git's configuration parsing, and includes numerous other bug fixes and +modifications. + +We would like to extend a special thanks to the following open-source +contributors: + +* @andyneff and @torbjoernk for updating our release targets +* @zkry for work on rate-limiting +* @Foxboron for work on reproducible builds +* @mstrap for adding a release target for Linux arm64 +* @keiko713, @Erwyn, and @mloskot for improving our documentation +* @QuLogic for fixing our tests under SELinux +* @saracen and @steffengodskesen for improving our output handling +* @mbsulliv for finding and fixing a bug where we ran out of file descriptors + +### Features + +* Add sles 15 support #1055 #3515 (@andyneff) +* docs/man/git-lfs-config.5.ronn: document GIT_LFS_SKIP_SMUDGE #3509 (@ttaylorr) +* commands/command_pointer.go: introduce `--check` option #3501 (@ttaylorr) +* Makefile additions for reproducible builds and asmflags #3444 (@Foxboron) +* locking: add flag to control modification of ignored files #3409 (@bk2204) +* build package for Ubuntu 18.10 aka Cosmic #3402 (@torbjoernk) +* Add support for retries with delays (ex. rate limiting) #3449 (@zkry) +* Trim embedded paths out of binaries #3443 (@bk2204) +* Ensure 32-bit Git LFS binaries can handle files larger than 4 GiB #3426 (@bk2204) +* Support attribute macros #3391 (@bk2204) +* tasklog: don't log progress status when stdout is not a tty #3349 (@steffengodskesen) +* locking: cache JSON response from server #3253 (@mstrap) +* tq: enable transfer debugging when GIT_CURL_VERBOSE is set #3341 (@bk2204) + +### Bugs + +* .circleci: don't use 'brew prune' #3514 (@ttaylorr) +* t/t-smudge.sh: remove unnecessary test #3513 (@ttaylorr) +* docs/man: fix inconsistency in 'git-lfs-ls-files(1)' #3496 (@ttaylorr) +* lfshttp: close body on redirect #3479 (@bk2204) +* status: handle deleted files gracefully #3482 (@bk2204) +* Fix hang in prune with too few file descriptors #3460 (@bk2204) +* Fix parameter name on List Locks API Documentation #3477 (@Erwyn) +* TST: Trim security context when checking permissions. #3476 (@QuLogic) +* command/env: ensure we honor lfs.url #3470 (@bk2204) +* Fix swapped case sensitivity in patterns #3433 (@bk2204) +* core.sharedRepository improvements for directories #3417 (@bk2204) +* Update the doc of whitelisted .lfsconfig keys #3423 (@keiko713) +* Rewrite URL config-matching #3392 (@PastelMobileSuit) +* git: close blob objects when finished #3379 (@bk2204) +* Avoid hang in repos cloned with --shared or --reference #3383 (@bk2204) +* commands/command_status.go: require a working copy #3378 (@ttaylorr) +* Fix test server API #3377 (@bk2204) +* vendor: don't remove necessary dependencies #3356 (@ttaylorr) +* filepathfilter: don't say file is both accepted and rejected #3360 (@bk2204) +* Support pushInsteadOf aliases when determining endpoints #3353 (@bk2204) +* Close attributes file #3347 (@mbsulliv) +* Fix humanize's FormatByteRate() to work with 0s duration #3340 (@saracen) + +### Misc + +* Release automation #3510 (@bk2204) +* docs/man: update `git-lfs-fetch(1)` manpage #3488 (@ttaylorr) +* Update Cobra #3483 (@bk2204) +* Run go generate only on Windows #3480 (@bk2204) +* docs/man/git-lfs-migrate: make examples less confusing #3424 (@bk2204) +* Modify logic of 'migrate info' to process extensionless files #3458 (@zkry) +* Improve error message on missing object #3398 (@bk2204) +* docs/man: suggest using Git configuration for LFS keys #3394 (@bk2204) +* Document default value of migrate info --top= #3387 (@mloskot) +* Clarify minimum git version #3327 (@carlwgeorge) + +## 2.6.1 (3 December 2018) + +This release contains miscellaneous bug fixes since v2.6.0. Most notably, +release v2.6.1 restores support for alternate repositories, which was +accidentally broken in v2.6.0. + +### Bugs + +* git: close blob objects when finished #3379 (@bk2204) +* Avoid hang in repos cloned with --shared or --reference #3383 (@bk2204) +* vendor: don't remove necessary dependencies #3356 (@ttaylorr) + +## 2.6.0 (1 November, 2018) + +This release adds better support for redirecting network calls from a Git LFS +API server to one that requires a different authentication mode, builds Git LFS +on Go 1.11, and numerous other bug fixes and modifications. + +We would like to extend a special thanks to the following open-source +contributors: + +* @andyneff for updating our release targets +* @gtsiolis: for removing the deprecated `git lfs clone` from the listing of + supported Git LFS commands +* @jsantell for fixing a formatting issue in the INCLUDE AND EXCLUDE man page + section +* @mmlb for adding a release target for Linux arm64 +* @skashyap7 for adding the 'git lfs track -n' +* @Villemoes: for modernizing the Git LFS installation procedure on Debian. + +### Features + +* commands: list explicitly excluded patterns separately #3320 (@bk2204) +* Uninstall improvements #3326 (@bk2204) +* config: honor GIT_AUTHOR_DATE and GIT_COMMITTER_DATE #3314 (@bk2204) +* Add new `.netrc` credential helper #3307 (@PastelMobileSuit) +* Honor umask and core.sharedRepository #3304 (@bk2204) +* Support listing only filename tracked by git lfs using --name (-n) option + #3271 (@skashyap7) +* all: use Go 1.11.1 in CI #3298 (@ttaylorr) +* lfsapi/tq: Have DoWithAuth() caller determine URL Access Mode #3293 + (@PastelMobileSuit) +* commands: undeprecate checkout #3303 (@bk2204) +* Checkout options for conflicts #3296 (@bk2204) +* Makefile: build source tarballs for release #3283 (@bk2204) +* Encrypted SSL key support #3270 (@bk2204) +* Add support for core.sshCommand #3235 (@bk2204) +* gitobj-based Object Scanner #3236 (@bk2204) +* README.md: new core team members #3217 (@ttaylorr) +* Add build and releases for linux arm64 #3196 (@mmlb) +* Update packagecloud.rb #3210 (@andyneff) +* all: use Go modules instead of Glide #3208 (@ttaylorr) +* all: use Go 1.11 in CI #3203 (@ttaylorr) + +### Bugs + +* Fix formatting of INCLUDE AND EXCLUDE (REFS) #3330 (@jsantell) +* go.sum: add missing entries #3319 (@bk2204) +* Ensure correct syntax for commit headers in lfs migrate import #3313 (@bk2204) +* Clean up trailing whitespace #3299 (@bk2204) +* commands: unambiguously resolve remote references #3285 (@ttaylorr) +* Expand custom transfer args by using the shell #3259 (@bk2204) +* Canonicalize paths properly on Windows #3277 (@bk2204) +* debian/prerm: add --system flag #3272 (@Villemoes) +* t: make testsuite run under git rebase -x #3262 (@bk2204) +* git/gitattr: parse 'set' attributes #3255 (@ttaylorr) +* t: avoid panic in lfstest-customadapter #3243 (@bk2204) +* t: avoid using shell variables in printf's first argument #3242 (@bk2204) +* lfsapi: handle SSH hostnames and aliases without users #3230 (@bk2204) +* commands/command_ls_files.go: ignore index with argument #3219 (@ttaylorr) +* commands/command_migrate_import.go: install hooks #3227 (@ttaylorr) +* t: mark test sources as .PHONY #3228 (@ttaylorr) +* Pass GIT_SSH_COMMAND to the shell #3199 (@bk2204) +* Tidy misformatted files #3202 (@bk2204) +* config: expand core.hooksPath #3212 (@ttaylorr) +* locks: manage write permissions of ignored files #3190 (@ttaylorr) + +### Misc + +* CONTRIBUTING.md: :nail_care: #3325 (@ttaylorr) +* Update CONTRIBUTING #3317 (@bk2204) +* go.mod: depend on tagged gitobj #3311 (@ttaylorr) +* RFC: SSH protocol #3290 (@bk2204) +* Remove `git lfs clone` command from man #3301 (@gtsiolis) +* ROADMAP.md: use GitHub issues instead #3286 (@ttaylorr) +* docs: add note about closing release milestone #3274 (@bk2204) +* CI improvements #3268 (@bk2204) +* docs/howto: document our release process #3261 (@ttaylorr) +* Create new lfshttp package #3244 (@PastelMobileSuit) +* CONTRIBUTING: update required go version #3232 (@PastelMobileSuit) +* go.mod: use latest github.com/olekukonko/ts #3223 (@ttaylorr) +* go.mod: pin github.com/git-lfs/wildmatch to v1.0.0 #3218 (@ttaylorr) +* Update README.md #3193 (@srl295) + +## 2.5.2 (17 September, 2018) + +### Bugs + +* config: Treat [host:port]:path URLs correctly #3226 (@saschpe) +* tq: Always provide a Content-Type when uploading files #3201 (@bk2204) +* commands/track: Properly `lfs track` files with escaped characters in their + name #3192 (@leonid-s-usov) + +### Misc + +* packagecloud.rb: remove older versions #3210 (@andyneff) + +## 2.5.1 (2 August, 2018) + +This release contains miscellaneous bug fixes since v2.5.0. Most notably, +release v2.5.1 allows a user to disable automatic Content-Type detection +(released in v2.5.0) via `git config lfs.contenttype false` for hosts that do +not support it. + +### Features + +* tq: make Content-Type detection disable-able #3163 (@ttaylorr) + +### Bugs + +* Makefile: add explicit rule for commands/mancontent_gen.go #3160 (@jj1bdx) +* script/install.sh: mark as executable #3155 (@ttaylorr) +* config: add origin to remote list #3152 (@PastelMobileSuit) + +### Misc + +* docs/man/mangen.go: don't show non-fatal output without --verbose #3168 + (@ttaylorr) +* LICENSE.md: update copyright year #3156 (@IMJ355) +* Makefile: silence some output #3164 (@ttaylorr) +* Makefile: list prerequisites for resource.syso #3153 (@ttaylorr) + +## 2.5.0 (26 July, 2018) + +This release adds three new migration modes, updated developer ergonomics, and +a handful of bug fixes to Git LFS. + +We would like to extend a special thanks to the following open-source +contributors: + +* @calavera for fixing a broken Go test and adding support for custom + Content-Type headers in #3137 and #3138. +* @cbuehlmann for adding support for encoded character names in filepaths via + #3093. +* @larsxschneider for changing the default value of lfs.allowincompletepush in + #3109. +* @NoEffex for supporting TTL in SSH-based authentication tokens via #2867. +* @ssgelm for adding 'go generate' to our Debian packages via #3083. + +### Features + +* Makefile: replace many scripts with make targets #3144 (@ttaylorr) +* {.travis,appveyor}.yml: upgrade to Go 1.10.3 #3146 (@ttaylorr) +* t: run tests using prove #3125 (@ttaylorr) +* commands/migrate: infer wildmatches with --fixup #3114 (@ttaylorr) +* Retry SSH resolution 5 times #2934 (@stanhu) +* Implement `migrate export` subcommand #3084 (@PastelMobileSuit) +* Add `--no-rewrite` flag to `migrate import` command #3029 (@PastelMobileSuit) + +### Bugs + +* t: fix contains_same_elements() fn #3145 (@PastelMobileSuit) +* commands: warn if working copy is dirty #3124 (@ttaylorr) +* Ensure provided remote takes precedence over configured pushRemote #3139 (@PastelMobileSuit) +* Fix proxy unit tests. #3138 (@calavera) +* commands/command_migrate.go: loosen meaning of '--everything' #3121 (@ttaylorr) +* lfsapi: don't query askpass for given creds #3126 (@PastelMobileSuit) +* config/git_fetcher.go: mark 'lfs.allowincompletepush' as safe #3113 (@ttaylorr) +* fs: support multiple object alternates #3116 (@ttaylorr) +* commands/checkout: checkout over read-only files #3120 (@ttaylorr) +* test/testhelpers.sh: look for 64 character SHA-256's #3119 (@ttaylorr) +* config/config.go: case-insensitive error search #3098 (@ttaylorr) +* Encoded characters in pathnames #3093 (@cbuehlmann) +* Support default TTL for authentication tokens acquired via SSH #2867 (@NoEffex) +* commands/status.go: relative paths outside of root #3080 (@ttaylorr) +* Run `go generate` on commands in deb build #3083 (@ssgelm) +* lfsapi: prefer proxying from gitconfig before environment #3062 (@ttaylorr) +* commands/track: respect global- and system-level gitattributes #3076 (@ttaylorr) +* git/git.go: pass --multiple to git-fetch(1) when appropriate #3063 (@ttaylorr) +* commands/checkout: fix inaccurate messaging #3055 (@ttaylorr) +* commands/migrate: do not migrate empty commits #3054 (@ttaylorr) +* git/odb: retain trailing newlines in commit messages #3053 (@ttaylorr) + +### Misc + +* Set original file content type on basic upload. #3137 (@calavera) +* README.md: Git for Windows ships LFS by default #3112 (@larsxschneider) +* change lfs.allowincompletepush default from true to false #3109 (@larsxschneider) +* *: replace git/odb with vendored copy #3108 (@ttaylorr) +* test/test-ls-files.sh: skip on CircleCI #3101 (@ttaylorr) +* lfsapi/ssh.go: use zero-value sentinels #3099 (@ttaylorr) +* README.md: add link to installation wiki page #3075 (@ttaylorr) +* docs/man/git-lfs.1.ronn: update casing and missing commands #3059 (@ttaylorr) +* commands/checkout: mark 'git lfs checkout' as deprecated #3056 (@ttaylorr) + +## 2.4.2 (28 May, 2018) + +### Bugs + +* lfsapi: re-authenticate HTTP redirects when needed #3028 (@ttaylorr) +* lfsapi: allow unknown keywords in netrc file(s) #3027 (@ttaylorr) + +## 2.4.1 (18 May, 2018) + +This release fixes a handful of bugs found and fixed since v2.4.0. In +particular, Git LFS no longer panic()'s after invalid API responses, can +correctly run 'fetch' on SHAs instead of references, migrates symbolic links +correctly, and avoids writing to `$HOME/.gitconfig` more than is necessary. + +We would like to extend a "thank you" to the following contributors for their +gracious patches: + +- @QuLogic fixed an issue with running tests that require credentials +- @patrickmarlier made it possible for 'git lfs migrate import' to work + correctly with symbolic links. +- @zackse fixed an inconsistency in `CONTRIBUTING.md` +- @zanglang fixed an inconsistency in `README.md` + +Git LFS would not be possible without generous contributions from the +open-source community. For these, and many more: thank you! + +### Features + +* script/packagecloud.rb: release on Ubuntu Bionic #2961 (@ttaylorr) + +### Bugs + +* lfsapi: canonicalize extra HTTP headers #3010 (@ttaylorr) +* commands/lock: follow symlinks before locking #2996 (@ttaylorr) +* lfs/attribute.go: remove default value from upgradeables #2994 (@ttaylorr) +* git: include SHA1 in ref-less revisions #2982 (@ttaylorr) +* Do not migrate the symlinks to LFS objects. #2983 (@patrickmarlier) +* commands/uninstall: do not log about global hooks with --local #2976 (@ttaylorr) +* commands/run.go: exit 127 on unknown sub-command #2969 (@ttaylorr) +* commands/{un,}track: perform "prefix-agnostic" comparisons #2955 (@ttaylorr) +* commands/migrate: escape paths before .gitattributes #2933 (@ttaylorr) +* commands/ls-files: do not accept '--all' after '--' #2932 (@ttaylorr) +* tq: prevent uint64 underflow with invalid API response #2902 (@ttaylorr) + +### Misc + +* test/test-env: skip comparing GIT_EXEC_PATH #3015 (@ttaylorr) +* remove reference to CLA from contributor's guide #2997 (@zackse) +* .gitattributes link is broken #2985 (@zanglang) +* commands: make --version a synonym for 'version' #2968, #3017 (@ttaylorr) +* test: ensure that git-mergetool(1) works with large files #2939 (@ttaylorr) +* README.md: note the correct PackageCloud URL #2960 (@ttaylorr) +* README.md: mention note about `git lfs track` retroactively #2948 (@ttaylorr) +* README.md: reorganize into Core Team, Alumni #2941 (@ttaylorr) +* README.md: :nail_care: #2942 (@ttaylorr) +* circle.yml: upgrade to 'version: 2' syntax #2928 (@ttaylorr) +* Use unique repo name for tests that require credentials. #2901 (@QuLogic) + +## 2.4.0 (2 March, 2018) + +This release introduces a rewrite of the underlying file matching engine, +expands the API to include relevant refspecs for individual requests, +standardizes the progress output among commands, and more. + +Please note: in the next MAJOR release (v3.0.0) the semantic meaning behind +`--include` and `--exclude` flags will change. As the details of exactly which +existing patterns will no longer function as previously are known, we will +indicate them here. Any `--include` or `--exclude` patterns used in v2.3.0 or +earlier are expected to work as previously in this release. + +This release would not be possible without the open-source community. +Specifically, we would like to thank: + +- @larsxschneider: for contributing fixes to the filter operation in `git lfs + fsck`, and `git lfs prune`, as well as the bug report leading to the + filepathfilter changes. +- @yfronto: for adding new Linux release targets. +- @stffabi: for adding support for NTLM with SSPI on Windows. +- @jeffreydwalter: for fixing memory alignment issues with `sync/atomic` on + 32-bit architectures. +- @b4mboo: for adding a LFS configuration key to the list of safe configuration + options. + +Without the aforementioned indviduals, this release would not have been +possible. Thank you! + +### Features + +* __Support wildmatch-compliant options in `--include`, `--exclude`__ + * filepathfilter: implement using wildmatch #2875 (@ttaylorr) + * test: add wildmatch migration tests #2888 (@larsxschneider, @ttaylorr) +* __Expand the specification to include relevant refspecs__ + * verify locks against each ref being pushed #2706 (@technoweenie) + * Batch send refspec take 2 #2809 (@technoweenie) + * Run 1 TransferQueue per uploaded ref #2806 (@technoweenie) + * Locks/verify: full refspec #2722 (@technoweenie) + * send remote refspec for the other lock commands #2773 (@technoweenie) +* __Standardize progress meter output and implementation__ + * tq: standardized progress meter formatting #2811 (@ttaylorr) + * commands/fetch: unify formatting #2758 (@ttaylorr) + * commands/prune: unify formatting #2757 (@ttaylorr) + * progress: use git/githistory/log package for formatting #2732 (@ttaylorr) + * progress: remove `*progress.Meter` #2762 (@ttaylorr) + * tasklog: teach `*Logger` how to enqueue new `*SimpleTask`'s #2767 (@ttaylorr) + * progress: remove spinner.go #2759 (@ttaylorr) +* __Teach new flags, functionality to `git lfs ls-files`__ + * commands: teach '--all' to `git lfs ls-files` #2796 (@ttaylorr) + * commands/ls-files: show cached, tree-less LFS objects #2795 (@ttaylorr) + * commands/ls-files: add --include, --exclude #2793 (@ttaylorr) + * commands/ls-files: add '--size' flag #2764 (@ttaylorr) +* __Add new flags, functionality to `git lfs migrate`__ + * commands/migrate: support '^'-prefix refspec in arguments #2785 (@ttaylorr) + * commands/migrate: add '--skip-fetch' for offline migrations #2738 (@ttaylorr) + * git: prefer sending revisions over STDIN than arguments #2739 (@ttaylorr) +* __Release to new operating systems__ + * release lfs for ubuntu/artful too #2704 (@technoweenie) + * Adding Mint Sylvia to packagecloud.rb script #2829 (@yfronto) +* __New functionality in package `lfsapi`__ + * NTLM authentication with SSPI on windows #2871 (@stffabi) + * lfsapi/auth: teach DoWithAuth to respect http.extraHeaders #2733 (@ttaylorr) + * add support for url-specific proxies #2651 (@technoweenie) +* __Code cleanup in git.Config, package `localstorage`__ + * Tracked remote #2700 (@technoweenie) + * Replace git.Config #2692 (@technoweenie) + * Replace localstorage #2689 (@technoweenie) + * Remove last global config #2687 (@technoweenie) + * Git config refactor #2676 (@technoweenie) + +### Bugs + +* all: fix 32-bit alignment issues with `sync/atomic` #2883 (@ttaylorr) +* all: memory alignment issues on 32-bit systems. #2880 (@jeffreydwalter) +* command/migrate: don't migrate remote references in bare repositories #2769 (@ttaylorr) +* commands/ls-files: behave correctly before initial commit #2794 (@ttaylorr) +* commands/migrate: allow for ambiguous references in migrations #2734 (@ttaylorr) +* commands: fill in missing printf arg #2678 (@technoweenie) +* config: Add `lfs.locksverify` to safe keys. #2797 (@b4mboo) +* don't replace pointers with objects if clean filter is not configured #2626 (@technoweenie) +* fsck: attach a filter to exclude unfetched items from fsck #2847 (@larsxschneider) +* git/githistory: copy entries from cache, elsewhere #2884 (@ttaylorr) +* git/githistory: migrate annotated tags correctly #2780 (@ttaylorr) +* git/odb: don't print extra newline after commit message #2784 (@ttaylorr) +* git/odb: extract identifiers from commits verbatim #2751 (@wsprent) +* git/odb: implement parsing for annotated `*Tag`'s #2778 (@ttaylorr) +* git/odb: retain newlines when parsing commit messages #2786 (@ttaylorr) +* lfs: PointerScanner is nil after error, so don't close #2699 (@technoweenie) +* lfsapi: Cred helper improvements #2695 (@technoweenie) +* lfsapi: retry requests changing access from none IF Auth header is empty #2621 (@technoweenie) +* prune: always prune excluded paths #2851 (@larsxschneider) +* status: fix incorrect formatting with unpushed objects #2746 (@ttaylorr) +* tasklog: don't drop updates in PercentageTask #2755 (@ttaylorr) +* test: Fix integration test early exit #2735 (@technoweenie) +* test: generate random repo names with fs-safe characters #2698 (@technoweenie) + +### Misc + +* all: Nitpicks #2821 (@technoweenie) +* all: introduce package 'tlog' #2747 (@ttaylorr) +* all: remove CLA #2870 (@MikeMcQuaid) +* build: Specify the embedded Windows icon as part of versioninfo.json #2770 (@sschuberth) +* config,test: Testlib no global config #2709 (@mathstuf) +* config: add PushRemote() for checking `branch.*.pushRemote` and `remote.pushDefault` first #2715 (@technoweenie) +* docs: Added documentation for git-lfs-ls-files' `*/-` output. #2719 (@bilke) +* docs: Uninstall man page improvements #2730 (@dpursehouse) +* docs: Update usage info for post-checkout #2830 (@proinsias) +* docs: add 'git lfs prune' to main man page #2849 (@larsxschneider) +* docs: use consistent casing for Git #2850 (@larsxschneider) +* git/githistory: have `*RefUpdater` hold `*odb.ObjectDatabase` reference #2779 (@ttaylorr) +* progress: move CopyCallback (& related) to package 'tools' #2749 (@ttaylorr) +* progress: move `*progressLogger` implementation to package 'tools' #2750 (@ttaylorr) +* refspec docs #2820 (@technoweenie) +* script/test: run 'go tool vet' during testing #2788 (@ttaylorr) +* tasklog: introduce `*SimpleTask` #2756 (@ttaylorr) +* test: Ignore comment attr lines #2708 (@mathstuf) +* test: Wait longer for test lfs server to start. #2716 (@QuLogic) +* test: ensure commented attr lines are ignored #2736 (@ttaylorr) +* tools/humanize: add 'FormatByteRate' to format transfer speed #2810 (@ttaylorr) +* vendor: update 'xeipuuv/gojsonpointer' #2846 (@ttaylorr) + +## 2.3.4 (18 October, 2017) + +### Features + +* 'git lfs install' updates filters with 'skip-smudge' option #2673 (@technoweenie) + +### Bugs + +* FastWalkGitRepo: limit number of concurrent goroutines #2672 (@technoweenie) +* handle scenario where multiple configuration values exist in ~/.gitconfig #2659 (@shiftkey) + +## 2.3.3 (9 October, 2017) + +### Bugs + +* invoke lfs for 'git update-index', fixing 'status' issues #2647 (@technoweenie) +* cache http credential helper output by default #2648 (@technoweenie) + +## 2.3.2 (3 October, 2017) + +### Features + +* bump default activity timeout from 10s -> 30s #2632 (@technoweenie) + +### Bugs + +* ensure files are marked readonly after unlocking by ID #2642 (@technoweenie) +* add files to index with path relative to current dir #2641 (@technoweenie) +* better Netrc errors #2633 (@technoweenie) +* only use askpass if credential.helper is not configured #2637 (@technoweenie) +* convert backslash to slash when writing to .gitattributes #2625 (@technoweenie) + +### Misc + +* only copy req headers if there are git-configured extra headers #2622 (@technoweenie) +* update tracerx to add timestamps #2620 (@rubyist) + +## 2.3.1 (27 September, 2017) + +### Features + +* add support for SSH_ASKPASS #2609 (@technoweenie) +* `git lfs migrate --verbose` option #2610 (@technoweenie) +* Support standalone custom transfer based on API URL prefix match #2590 (@sprohaska) + +### Bugs + +* Improve invalid URL error messages #2614 (@technoweenie) +* Fix double counting progress bug #2608 (@technoweenie) +* trim whitespace from GIT_ASKPASS provided passwords #2607 (@technoweenie) +* remove mmap usage in Packfile reader #2600 (@technoweenie) +* `git lfs clone`: don't fetch for unborn repositories #2598 (@shiftkey) + +### Misc + +* Windows Installer fixes: + * Show proper icon in add/remove programs list #2585 (@shiftkey) + * Make the Inno Setup installer script explicitly check for the binaries #2588 (@sschuberth) + * Improve compile-win-installer-unsigned.bat a bit #2586 (@sschuberth) +* Update migrate docs example for multiple file types #2596 (@technoweenie) + +## 2.3.0 (14 September, 2017) + +Git LFS v2.3.0 includes performance optimizations for the `git-lfs-migrate(1)` +and `git-clone(1)` commands, new features, bug-fixes, and more. + +This release was made possible by contributors to Git LFS. Specifically: + +- @aleb: added support for "standalone" transfer agents, for using `rsync(1)` + and similar with Git LFS. +- @bozaro: added support for custom `.git/lfs/objects` directories via the + `lfs.storage` configuration option. +- @larsxschneider: fixed a recursive process leak when shelling out to Git, + added new features to `git lfs ls-files`, extra information in error + messages used for debugging, documentation changes and more. +- @mathstuf: contributed a documentation change clarifying LFS's handling of + empty pointer files. +- @rudineirk and @andyneff: updated our release process to build packages for + fedora/26. +- @ssgelm: ensured that LFS is able to be released on Ubuntu Universe. + +To everyone who has contributed to this or previous releases of Git LFS: Thank +you! + +### Features + +* git/odb/pack: improve `git lfs migrate` performance + * git/odb/pack: introduce packed object reassembly #2550 #2551 #2552 #2553 #2554 (@ttaylorr) + * git/odb/pack: teach packfile index entry lookups #2420 #2421 #2422 #2423 #2437 #2441 #2461 (@ttaylorr) + * git/{odb,githistory}: don't write unchanged objects #2541 (@ttaylorr) +* commands: improve `git clone` performance with 'delay' capability #2511 #2469 #2468 #2471 #2467 #2476 #2483 (@ttaylorr) + * commands: mark `git lfs clone` as deprecated #2526 (@ttaylorr) +* commands: enable `lfs.allowincompletepush` by default #2574 (@technoweenie) +* commands: teach '--everything' to `git lfs migrate` #2558 (@ttaylorr) +* commands: teach `git lfs ls-files` a '--debug' option #2540 (@larsxschneider) +* commands,lfs: warn on 4gb size conversion during clean #2510 #2507 #2459 (@ttaylorr) +* lfsapi/creds: teach about GIT_ASKPASS and core.askpass #2500 #2578 (@ttaylorr) +* commands/status: indicate missing objects #2438 (@ttaylorr) +* Allow using custom transfer agents directly #2429 (@aleb) +* Add `lfs.storage` parameter for overriding LFS storage location #2023 (@bozaro) +* lfsapi: enable credential caching by default #2508 (@ttaylorr) +* commands/install: teach `--manual` to `git-lfs-install(1)` #2410 (@ttaylorr) + +### Bugs + +* migrate: fix migrations with subdirectories in '--include' or '--exclude' #2485 (@ttaylorr) +* commands/migrate: fix hardlinking issue when different filesystem is mounted at `/tmp` #2566 (@ttaylorr) +* commands: make `git lfs migrate` fetch ref updates before migrating #2538 (@ttaylorr) +* commands: remove '--above=1mb' default from `git lfs migrate info` #2460 (@ttaylorr) +* filepathfilter: fix `HasPrefix()` when no '--include' filters present #2579 (@technoweenie) +* git/githistory/log: fix race condition with `git/githistory/log` tests #2495 (@ttaylorr) +* git/odb: fix closing object database test #2457 (@ttaylorr) +* git/githistory: only update local refs after migrations #2559 (@ttaylorr) +* locking: fix unlocking files not removing write flag #2514 (@ttaylorr) +* locks: fix unlocking files in a symlinked directory #2505 (@ttaylorr) +* commands: teach `git lfs unlock` to ignore status errs in appropriate conditions #2475 (@ttaylorr) +* git: expand `GetAttributePaths` check to include non-LFS lockables #2528 (@ttaylorr) +* fix multiple `git update-index` invocations #2531 (@larsxschneider) +* tools: fix SSH credential cacher expiration #2530 (@ttaylorr) +* lfsapi: fix read/write race condition in credential cacher #2493 (@ttaylorr) +* lfs: fix cleaning contents larger than 1024 bytes over stdin #2488 (@ttaylorr) +* fsck only scans current version of objects #2049 (@TheJare) +* progress: fix writing updates to `$GIT_LFS_PROGRESS` #2465 (@ttaylorr) +* commands/track: resolve symlinks before comparing attr paths #2463 (@ttaylorr) +* test: ensure that empty pointers are empty #2458 (@ttaylorr) +* git/githistory/log: prevent 'NaN' showing up in `*PercentageTask` #2455 (@ttaylorr) +* tq: teach Batch() API to retry itself after io.EOF's #2516 (@ttaylorr) + +### Misc + +* script/packagecloud: release LFS on Fedora/26 #2443 #2509 (@rudineirk, @andyneff) +* git/githistory: change "Rewriting commits" when not updating refs #2577 (@ttaylorr) +* commands: print IP addresses in error logs #2570 (@larsxschneider) +* commands: print current time in UTC to error logs #2571 (@larsxschneider) +* commands: Disable lock verification when using a standalone custom-tr… #2499 (@aleb) +* docs/man: update `git lfs migrate` documentation with EXAMPLES #2580 (@technoweenie) +* docs/man: recommend global per-host locking config #2546 (@larsxschneider) +* commands: use transfer queue's batch size instead of constant #2529 (@ttaylorr) +* add function to invoke Git with disabled LFS filters #2453 (@larsxschneider) +* config: warn on unsafe keys in `.lfsconfig` #2502 (@ttaylorr) +* glide: remove unused dependencies #2501 (@ttaylorr) +* script/build: pass '-{ld,gc}flags' to compiler, if given #2462 (@ttaylorr) +* spec: mention that an empty file is its own LFS pointer #2449 (@mathstuf) +* Update to latest version of github.com/pkg/errors #2426 (@ssgelm) +* Update gitignore to add some temp files that get created when building debs #2425 (@ssgelm) +* lfs: indent contents of `git lfs install`, `update` #2392 (@ttaylorr) +* tq: increase default `lfs.concurrenttransfers` to 8 #2506 (@ttaylorr) + +## 2.2.1 (10 July, 2017) + +### Bugs + +* git lfs status --json only includes lfs files #2374 (@asottile) +* git/odb: remove temporary files after migration #2388 (@ttaylorr) +* git/githistory: fix hanging on empty set of commits #2383 (@ttaylorr) +* migrate: don't checkout HEAD on bare repositories #2389 (@ttaylorr) +* git/odb: prevent cross-volume link error when saving objects #2382 (@ttaylorr) +* commands: only pass --jobs to `git clone` if set #2369 (@technoweenie) + +### Misc + +* lfs: trace hook install, uninstall, upgrade #2393 (@ttaylorr) +* vendor: remove github.com/cheggaaa/pb #2386 (@ttaylorr) +* Use FormatBytes from git-lfs/tools/humanize instead of cheggaaa/pb #2377 (@ssgelm) + + +## 2.2.0 (27 June, 2017) + +Git LFS v2.2.0 includes bug fixes, minor features, and a brand new `migrate` +command. The `migrate` command rewrites commits, converting large files from +Git blobs to LFS objects. The most common use case will fix a git push rejected +for having large blobs: + +``` +$ git push origin master +# ... +remote: error: file a.psd is 1.2 gb; this exceeds github's file size limit of 100.00 mb +to github.com:ttaylorr/demo.git + ! [remote rejected] master -> master (pre-receive hook declined) +error: failed to push some refs to 'git@github.com:ttaylorr/demo.git' + +$ git lfs migrate info +*.psd 1.2 GB 27/27 files(s) 100% + +$ git lfs migrate import --include="*.psd" +migrate: Sorting commits: ..., done +migrate: Rewriting commits: 100% (810/810), done + master f18bb746d44e8ea5065fc779bb1acdf3cdae7ed8 -> 35b0fe0a7bf3ae6952ec9584895a7fb6ebcd498b +migrate: Updating refs: ..., done + +$ git push origin +Git LFS: (1 of 1 files) 1.2 GB / 1.2 GB +# ... +To github.com:ttaylorr/demo.git + * [new branch] master -> master +``` + +The `migrate` command has detailed options described in the `git-lfs-migrate(1)` +man page. Keep in mind that this is the first pass at such a command, so we +expect there to be bugs and performance issues (especially on long git histories). +Future updates to the command will be focused on improvements to allow full +LFS transitions on large repositories. + +### Features + +* commands: add git-lfs-migrate(1) 'import' subcommand #2353 (@ttaylorr) +* commands: add git-lfs-migrate(1) 'info' subcommand #2313 (@ttaylorr) +* Implement status --json #2311 (@asottile) +* commands/uploader: allow incomplete pushes #2199 (@ttaylorr) + +### Bugs + +* Retry on timeout or temporary errors #2312 (@jakub-m) +* commands/uploader: don't verify locks if verification is disabled #2278 (@ttaylorr) +* Fix tools.TranslateCygwinPath() on MSYS #2277 (@raleksandar) +* commands/clone: add new flags since Git 2.9 #2251, #2252 (@ttaylorr) +* Make pull return non-zero error code when some downloads failed #2237 (@seth2810) +* tq/basic_download: guard against nil HTTP response #2227 (@ttaylorr) +* Bugfix: cannot push to scp style URL #2198 (@jiangxin) +* support lfs..* values where url does not include .git #2192 (@technoweenie) +* commands: fix logged error not interpolating format qualifiers #2228 (@ttaylorr) +* commands/help: print helptext to stdout for consistency with Git #2210 (@ttaylorr) + +### Misc + +* Minor cleanups in help index #2248 (@dpursehouse) +* Add git-lfs-lock and git-lfs-unlock to help index #2232 (@dpursehouse) +* packagecloud: add Debian 9 entry to formatted list #2211 (@ttaylorr) +* Update Xenial is to use stretch packages #2212 (@andyneff) + +## 2.1.1 (19 May, 2017) + +Git LFS v2.1.1 ships with bug fixes and a security patch fixing a remote code +execution vulnerability exploitable by setting a SSH remote via your +repository's `.lfsconfig` to contain the string "-oProxyCommand". This +vulnerability is only exploitable if an attacker has write access to your +repository, or you clone a repository with a `.lfsconfig` file containing that +string. + +### Bugs + +* Make pull return non-zero error code when some downloads failed #2245 (@seth2810, @technoweenie) +* lfsapi: support cross-scheme redirection #2243 (@ttaylorr) +* sanitize ssh options parsed from ssh:// url #2242 (@technoweenie) +* filepathfilter: interpret as .gitignore syntax #2238 (@technoweenie) +* tq/basic_download: guard against nil HTTP response #2229 (@ttaylorr) +* commands: fix logged error not interpolating format qualifiers #2230 (@ttaylorr) + +### Misc + +* release: backport Debian 9-related changes #2244 (@ssgelm, @andyneff, @ttaylorr) +* Add git-lfs-lock and git-lfs-unlock to help index #2240 (@dpursehouse) +* config: allow multiple environments when calling config.Unmarshal #2224 (@ttaylorr) + +## 2.1.0 (28 April, 2017) + +### Features + +* commands/track: teach --no-modify-attrs #2175 (@ttaylorr) +* commands/status: add blob info to each entry #2070 (@ttaylorr) +* lfsapi: improve HTTP request/response stats #2184 (@technoweenie) +* all: support URL-style configuration lookups (@ttaylorr) + * commands: support URL-style lookups for `lfs.{url}.locksverify` #2162 (@ttaylorr) + * lfsapi: support URL-style lookups for `lfs.{url}.access` #2161 (@ttaylorr) + * lfsapi/certs: use `*config.URLConfig` to do per-host config lookup #2160 (@ttaylorr) + * lfsapi: support for http..extraHeader #2159 (@ttaylorr) + * config: add prefix to URLConfig type #2158 (@ttaylorr) + * config: remove dependency on lfsapi package #2156 (@ttaylorr) + * config: support multi-value lookup on URLConfig #2154 (@ttaylorr) + * lfsapi: initial httpconfig type #1912 (@technoweenie, @ttaylorr) +* lfsapi,tq: relative expiration support #2130 (@ttaylorr) + +### Bugs + +* commands: include error in `LoggedError()` #2179 (@ttaylorr) +* commands: cross-platform log formatting to files #2178 (@ttaylorr) +* locks: cross-platform path normalization #2139 (@ttaylorr) +* commands,locking: don't disable locking for auth errors during verify #2110 (@ttaylorr) +* commands/status: show partially staged files twice #2067 (@ttaylorr) + +### Misc + +* all: build on Go 1.8.1 #2145 (@ttaylorr) +* Polish custom-transfers.md #2171 (@sprohaska) +* commands/push: Fix typo in comment #2170 (@sprohaska) +* config: support multi-valued config entries #2152 (@ttaylorr) +* smudge: use localstorage temp directory, not system #2140 (@ttaylorr) +* locking: send locks limit to server #2107 (@ttaylorr) +* lfs: extract `DiffIndexScanner` #2035 (@ttaylorr) +* status: use DiffIndexScanner to populate results #2042 (@ttaylorr) + +## 2.0.2 (29 March, 2017) + +### Features + +* ssh auth and credential helper caching #2094 (@ttaylorr) +* commands,tq: specialized logging for missing/corrupt objects #2085 (@ttaylorr) +* commands/clone: install repo-level hooks after `git lfs clone` #2074 +* (@ttaylorr) +* debian: Support building on armhf and arm64 #2089 (@p12tic) + +### Bugs + +* commands,locking: don't disable locking for auth errors during verify #2111 +* (@ttaylorr) +* commands: show real error while cleaning #2096 (@ttaylorr) +* lfsapi/auth: optionally prepend an empty scheme to Git remote URLs #2092 +* (@ttaylorr) +* tq/verify: authenticate verify requests if required #2084 (@ttaylorr) +* commands/{,un}track: correctly escape '#' and ' ' characters #2079 (@ttaylorr) +* tq: use initialized lfsapi.Client instances in transfer adapters #2048 +* (@ttaylorr) + +### Misc + +* locking: send locks limit to server #2109 (@ttaylorr) +* docs: update configuration documentation #2097 #2019 #2102 (@terrorobe) +* docs: update locking API documentation #2099 #2101 (@dpursehouse) +* fixed table markdown in README.md #2095 (@ZaninAndrea) +* remove the the duplicate work #2098 (@grimreaper) + +## 2.0.1 (6 March, 2017) + +### Misc + +* tq: fallback to `_links` if present #2007 (@ttaylorr) + +## 2.0.0 (1 March, 2017) + +Git LFS v2.0.0 brings a number of important bug fixes, some new features, and +a lot of internal refactoring. It also completely removes old APIs that were +deprecated in Git LFS v0.6. + +### Locking + +File Locking is a brand new feature that lets teams communicate when they are +working on files that are difficult to merge. Users are not able to edit or push +changes to any files that are locked by other users. While the feature has been +in discussion for a year, we are releasing a basic Locking implementation to +solicit feedback from the community. + +### Transfer Queue + +LFS 2.0 introduces a new Git Scanner, which walks a range of Git commits looking +for LFS objects to transfer. The Git Scanner is now asynchronous, initiating +large uploads or downloads in the Transfer Queue immediately once an LFS object +is found. Previously, the Transfer Queue waited until all of the Git commits +have been scanned before initiating the transfer. The Transfer Queue also +automatically retries failed uploads and downloads more often. + +### Deprecations + +Git LFS v2.0.0 also drops support for the legacy API in v0.5.0. If you're still +using LFS servers on the old API, you'll have to stick to v1.5.6. + +### Features + +* Mid-stage locking support #1769 (@sinbad) +* Define lockable files, make read-only in working copy #1870 (@sinbad) +* Check that files are not uncommitted before unlock #1896 (@sinbad) +* Fix `lfs unlock --force` on a missing file #1927 (@technoweenie) +* locking: teach pre-push hook to check for locks #1815 (@ttaylorr) +* locking: add `--json` flag #1814 (@ttaylorr) +* Implement local lock cache, support querying it #1760 (@sinbad) +* support for client certificates pt 2 #1893 (@technoweenie) +* Fix clash between progress meter and credential helper #1886 (@technoweenie) +* Teach uninstall cmd about --local and --system #1887 (@technoweenie) +* Add `--skip-repo` option to `git lfs install` & use in tests #1868 (@sinbad) +* commands: convert push, pre-push to use async gitscanner #1812 (@ttaylorr) +* tq: prioritize transferring retries before new items #1758 (@ttaylorr) + +### Bugs + +* ensure you're in the correct directory when installing #1793 (@technoweenie) +* locking: make API requests relative to repository, not root #1818 (@ttaylorr) +* Teach 'track' about CRLF #1914 (@technoweenie) +* Teach 'track' how to handle empty lines in .gitattributes #1921 (@technoweenie) +* Closing stdout pipe before function return #1861 (@monitorjbl) +* Custom transfer terminate #1847 (@sinbad) +* Fix Install in root problems #1727 (@technoweenie) +* cat-file batch: read all of the bytes #1680 (@technoweenie) +* Fixed file paths on cygwin. #1820, #1965 (@creste, @ttaylorr) +* tq: decrement uploaded bytes in basic_upload before retry #1958 (@ttaylorr) +* progress: fix never reading bytes with sufficiently small files #1955 (@ttaylorr) +* tools: fix truncating string fields between balanced quotes in GIT_SSH_COMMAND #1962 (@ttaylorr) +* commands/smudge: treat empty pointers as empty files #1954 (@ttaylorr) + +### Misc + +* all: build using Go 1.8 #1952 (@ttaylorr) +* Embed the version information into the Windows executable #1689 (@sschuberth) +* Add more meta-data to the Windows installer executable #1752 (@sschuberth) +* docs/api: object size must be positive #1779 (@ttaylorr) +* build: omit DWARF tables by default #1937 (@ttaylorr) +* Add test to prove set operator [] works in filter matching #1768 (@sinbad) +* test: add ntlm integration test #1840 (@technoweenie) +* lfs/tq: completely remove legacy support #1686 (@ttaylorr) +* remove deprecated features #1679 (@technoweenie) +* remove legacy api support #1629 (@technoweenie) + +## 1.5.6 (16 February, 2017) + +## Bugs + +* Spool malformed pointers to avoid deadlock #1932 (@ttaylorr) + +## 1.5.5 (12 January, 2017) + +### Bugs + +* lfs: only buffer first 1k when creating a CleanPointerError #1856 (@ttaylorr) + +## 1.5.4 (27 December, 2016) + +### Bugs + +* progress: guard negative padding width, panic in `strings.Repeat` #1807 (@ttaylorr) +* commands,lfs: handle malformed pointers #1805 (@ttaylorr) + +### Misc + +* script/packagecloud: release LFS on fedora/25 #1798 (@ttaylorr) +* backport filepathfilter to v1.5.x #1782 (@technoweenie) + +## 1.5.3 (5 December, 2016) + +### Bugs + +* Support LFS installations at filesystem root #1732 (@technoweenie) +* git: parse filter process header values containing '=' properly #1733 (@larsxschneider) +* Fix SSH endpoint parsing #1738 (@technoweenie) + +### Misc + +* build: release on Go 1.7.4 #1741 (@ttaylorr) + +## 1.5.2 (22 November, 2016) + +### Features + +* Release LFS on Fedora 24 #1685 (@technoweenie) + +### Bugs + +* filter-process: fix reading 1024 byte files #1708 (@ttaylorr) +* Support long paths on Windows #1705 (@technoweenie) + +### Misc + +* filter-process: exit with error if we detect an unknown command from Git #1707 (@ttaylorr) +* vendor: remove contentaddressable lib #1706 (@technoweenie) + +## 1.5.1 (18 November, 2016) + +### Bugs + +* cat-file --batch parser errors on non-lfs git blobs #1680 (@technoweenie) + +## 1.5.0 (17 November, 2016) + +### Features + +* Filter Protocol Support #1617 (@ttaylorr, @larsxschneider) +* Fast directory walk #1616 (@sinbad) +* Allow usage of proxies even when contacting localhost #1605 (@chalstrick) + +### Bugs + +* start reading off the Watch() channel before sending any input #1671 (@technoweenie) +* wait for remote ref commands to exit before returning #1656 (@jjgod, @technoweenie) + +### Misc + +* rewrite new catfilebatch implementation for upcoming gitscanner pkg #1650 (@technoweenie) +* refactor testutils.FileInput so it's a little more clear #1666 (@technoweenie) +* Update the lfs track docs #1642 (@technoweenie) +* Pre push tracing #1638 (@technoweenie) +* Remove `AllGitConfig()` #1634 (@technoweenie) +* README: set minimal required Git version to 1.8.5 #1636 (@larsxschneider) +* 'smudge --info' is deprecated in favor of 'ls-files' #1631 (@technoweenie) +* travis-ci: test GitLFS with ancient Git version #1626 (@larsxschneider) + +## 1.4.4 (24 October, 2016) + +### Bugs + +* transfer: more descriptive "expired at" errors #1603 (@ttaylorr) +* commands,lfs/tq: Only send unique OIDs to the Transfer Queue #1600 (@ttaylorr) +* Expose the result message in case of an SSH authentication error #1599 (@sschuberth) + +### Misc + +* AppVeyor: Do not build branches with open pull requests #1594 (@sschuberth) +* Update .mailmap #1593 (@dpursehouse) + +## 1.4.3 (17 October, 2016) + +### Bugs + +* lfs/tq: use extra arguments given to tracerx.Printf #1583 (@ttaylorr) +* api: correctly print legacy API warning to Stderr #1582 (@ttaylorr) + +### Misc + +* Test storage retries #1585 (@ttaylorr) +* Test legacy check retries behavior #1584 (@ttaylorr) +* docs: Fix a link to the legacy API #1579 (@sschuberth) +* Add a .mailmap file #1577 (@sschuberth) +* Add a large wizard image to the Windows installer #1575 (@sschuberth) +* Appveyor badge #1574 (@ttaylorr) + +## 1.4.2 (10 October, 2016) + +v1.4.2 brings a number of bug fixes and usability improvements to LFS. This +release also adds support for multiple retries within the transfer queue, making +transfers much more reliable. To enable this feature, see the documentation for +`lfs.transfer.maxretries` in `git-lfs-config(5)`. + +We'd also like to extend a special thank-you to @sschuberth who undertook the +process of making LFS's test run on Windows through AppVeyor. Now all pull +requests run tests on macOS, Linux, and Windows. + +### Features + +* lfs: warn on usage of the legacy API #1564 (@ttaylorr) +* use filepath.Clean() when comparing filenames to include/exclude patterns #1565 (@technoweenie) +* lfs/transfer_queue: support multiple retries per object #1505, #1528, #1535, #1545 (@ttaylorr) +* Automatically upgrade old filters instead of requiring β€”force #1497 (@sinbad) +* Allow lfs.pushurl in .lfsconfig #1489 (@technoweenie) + +### Bugs + +* Use "sha256sum" on Windows #1566 (@sschuberth) +* git: ignore non-root wildcards #1563 (@ttaylorr) +* Teach status to recognize multiple files with identical contents #1550 (@ttaylorr) +* Status initial commit #1540 (@sinbad) +* Make path comparison robust against Windows short / long path issues #1523 (@sschuberth) +* Allow fetch to run without a remote configured #1507 (@sschuberth) + +### Misc + +* travis: run tests on Go 1.7.1 #1568 (@ttaylorr) +* Enable running tests on AppVeyor CI #1567 (@sschuberth) +* Travis: Only install git if not installed yet #1557 (@sschuberth) +* Windows test framework fixes #1522 (@sschuberth) +* Simplify getting the absolute Git root directory #1518 (@sschuberth) +* Add icons to the Windows installer #1504 (@sschuberth) +* docs/man: reference git-lfs-pointer(1) in clean documentation #1503 (@ttaylorr) +* Make AppVeyor CI for Windows work again #1506 (@sschuberth) +* commands: try out RegisterCommand() #1495 (@technoweenie) + +## 1.4.1 (26 August, 2016) + +### Features + +* retry if file download failed #1454 (@larsxschneider) +* Support wrapped clone in current directory #1478 (@ttaylorr) + +### Misc + +* Test `RetriableReader` #1482 (@ttaylorr) + +## 1.4.0 (19 August, 2016) + +### Features + +* Install LFS at the system level when packaged #1460 (@javabrett) +* Fetch remote urls #1451 (@technoweenie) +* add object Authenticated property #1452 (@technoweenie) +* add support for `url.*.insteadof` in git config #1117, #1443 (@artagnon, @technoweenie) + +### Bugs + +* fix --include bug when multiple files have same lfs content #1458 (@technoweenie) +* check the git version is ok in some key commands #1461 (@technoweenie) +* fix duplicate error reporting #1445, #1453 (@dpursehouse, @technoweenie) +* transfer/custom: encode "event" as lowercase #1441 (@ttaylorr) + +### Misc + +* docs/man: note GIT_LFS_PROGRESS #1469 (@ttaylorr) +* Reword the description of HTTP 509 status #1467 (@dpursehouse) +* Update fetch include/exclude docs for pattern matching #1455 (@ralfthewise) +* config-next: API changes to the `config` package #1425 (@ttaylorr) +* errors-next: Contextualize error messages #1463 (@ttaylorr, @technoweenie) +* scope commands to not leak instances of themselves #1434 (@technoweenie) +* Transfer manifest #1430 (@technoweenie) + +## 1.3.1 (2 August 2016) + +### Features + +* lfs/hook: teach `lfs.Hook` about `core.hooksPath` #1409 (@ttaylorr) + +### Bugs + +* distinguish between empty include/exclude paths #1411 (@technoweenie) +* Fix sslCAInfo config lookup when host in config doesn't have a trailing slash #1404 (@dakotahawkins) + +### Misc + +* Use commands.Config instead of config.Config #1390 (@technoweenie) + +## 1.3.0 (21 July 2016) + +### Features + +* use proxy from git config #1173, #1358 (@jonmagic, @LizzHale, @technoweenie) +* Enhanced upload/download of LFS content: #1265 #1279 #1297 #1303 #1367 (@sinbad) + * Resumable downloads using HTTP range headers + * Resumable uploads using [tus.io protocol](http://tus.io) + * Pluggable [custom transfer adapters](https://github.com/git-lfs/git-lfs/blob/master/docs/custom-transfers.md) +* In git 2.9+, run "git lfs pull" in submodules after "git lfs clone" #1373 (@sinbad) +* cmd,doc,test: teach `git lfs track --{no-touch,verbose,dry-run}` #1344 (@ttaylorr) +* ⏳ Retry transfers with expired actions #1350 (@ttaylorr) +* Safe track patterns #1346 (@ttaylorr) +* Add checkout --unstaged flag #1262 (@orivej) +* cmd/clone: add include/exclude via flags and config #1321 (@ttaylorr) +* Improve progress reporting when files skipped #1296 (@sinbad) +* Experimental file locking commands #1236, #1259, #1256, #1386 (@ttaylorr) +* Implement support for GIT_SSH_COMMAND #1260 (@pdf) +* Recognize include/exclude filters from config #1257 (@ttaylorr) + +### Bugs + +* Fix bug in Windows installer under Win32. #1200 (@teo-tsirpanis) +* Updated request.GetAuthType to handle multi-value auth headers #1379 (@VladimirKhvostov) +* Windows fixes #1374 (@sinbad) +* Handle artifactory responses #1371 (@ttaylorr) +* use `git rev-list --stdin` instead of passing each remote ref #1359 (@technoweenie) +* docs/man: move "logs" subcommands from OPTIONS to COMMANDS #1335 (@ttaylorr) +* test/zero-len: update test for git v2.9.1 #1369 (@ttaylorr) +* Unbreak building httputil on OpenBSD #1360 (@jasperla) +* WIP transferqueue race fix #1255 (@technoweenie) +* Safety check to `comands.requireStdin` #1349 (@ttaylorr) +* Removed CentOS 5 from dockers. Fixed #1295. #1298 (@javabrett) +* Fix 'git lfs fetch' with a sha1 ref #1323 (@omonnier) +* Ignore HEAD ref when fetching with --all #1310 (@ttaylorr) +* Return a fully remote ref to reduce chances of ref clashes #1248 (@technoweenie) +* Fix reporting of `git update-index` errors in `git lfs checkout` and `git lfs pull` #1400 (@technoweenie) + +### Misc + +* Added Linux Mint Sarah to package cloud script #1384 (@andyneff) +* travis-ci: require successful tests against upcoming Git core release #1372 (@larsxschneider) +* travis-ci: add a build job to test against upcoming versions of Git #1361 (@larsxschneider) +* Create Makefiles for building with gccgo #1222 (@zeldin) +* README: add @ttaylorr to core team #1332 (@ttaylorr) +* Enforced a minimum gem version of 1.0.4 for packagecloud-ruby #1292 (@javabrett) +* I think this should be "Once installed" and not "One installed", but … #1305 (@GabLeRoux) +* script/test: propagate extra args to go test #1324 (@omonnier) +* Add `lfs.basictransfersonly` option to disable non-basic transfer adapters #1299 (@sinbad) +* Debian build vendor test excludes #1291 (@javabrett) +* gitignore: ignore lfstest-\* files #1271 (@ttaylorr) +* Disable gojsonschema test, causes failures when firewalls block it #1274 (@sinbad) +* test: use noop credential helper for auth tests #1267 (@ttaylorr) +* get git tests passing when run outside of repository #1229 (@technoweenie) +* Package refactor no.1 #1226 (@sinbad) +* vendor: vendor dependencies in vendor/ using Glide #1243 (@ttaylorr) + +## 1.2.1 (2 June 2016) + +### Features + +* Add missing config details to `env` command #1217 (@sinbad) +* Allow smudge filter to return 0 on download failure #1213 (@sinbad) +* Add `git lfs update --manual` option & promote it on hook install fail #1182 (@sinbad) +* Pass `git lfs clone` flags through to `git clone` correctly, respect some options #1160 (@sinbad) + +### Bugs + +* Clean trailing `/` from include/exclude paths #1278 (@ttaylorr) +* Fix problems with user prompts in `git lfs clone` #1185 (@sinbad) +* Fix failure to return non-zero exit code when lfs install/update fails to install hooks #1178 (@sinbad) +* Fix missing man page #1149 (@javabrett) +* fix concurrent map read and map write #1179 (@technoweenie) + +### Misc + +* Allow additional fields on request & response schema #1276 (@sinbad) +* Fix installer error on win32. #1198 (@teo-tsirpanis) +* Applied same -ldflags -X name value -> name=value fix #1193 (@javabrett) +* add instructions to install from MacPorts #1186 (@skymoo) +* Add xenial repo #1170 (@graingert) + +## 1.2.0 (14 April 2016) + +### Features + +* netrc support #715 (@rubyist) +* `git lfs clone` command #988 (@sinbad) +* Support self-signed certs #1067 (@sinbad) +* Support sslverify option for specific hosts #1081 (@sinbad) +* Stop transferring duplicate objects on major push or fetch operations on multiple refs. #1128 (@technoweenie) +* Touch existing git tracked files when tracked in LFS so they are flagged as modified #1104 (@sinbad) +* Support for git reference clones #1007 (@jlehtnie) + +### Bugs + +* Fix clean/smudge filter string for files starting with - #1083 (@epriestley) +* Fix silent failure to push LFS objects when ref matches a filename in the working copy #1096 (@epriestley) +* Fix problems with using LFS in symlinked folders #818 (@sinbad) +* Fix git lfs push silently misbehaving on ambiguous refs; fail like git push instead #1118 (@sinbad) +* Whitelist `lfs.*.access` config in local ~/.lfsconfig #1122 (@rjbell4) +* Only write the encoded pointer information to Stdout #1105 (@sschuberth) +* Use hardcoded auth from remote or lfs config when accessing the storage api #1136 (@technoweenie, @jonmagic) +* SSH should be called more strictly with command as one argument #1134 (@sinbad) + +## 1.1.2 (1 March, 2016) + +* Fix Base64 issues with `?` #989 (@technoweenie) +* Fix zombie git proc issue #1012 (@rlaakkol) +* Fix problems with files containing unicode characters #1016 (@technoweenie) +* Fix panic in `git cat-file` parser #1006 (@technoweenie) +* Display error messages in non-fatal errors #1028 #1039 #1042 (@technoweenie) +* Fix concurrent map access in progress meter (@technoweenie) + +## 1.1.1 (4 February, 2016) + +### Features + +* Add copy-on-write support for Linux BTRFS filesystem #952 (@bozaro) +* convert `git://` remotes to LFS servers automatically #964 (@technoweenie) +* Fix `git lfs track` handling of absolute paths. #975 (@technoweenie) +* Allow tunable http client timeouts #977 (@technoweenie) + +### Bugs + +* Suppress git config warnings for non-LFS keys #861 (@technoweenie) +* Fix fallthrough when `git-lfs-authenticate` returns an error #909 (@sinbad) +* Fix progress bar issue #883 (@pokehanai) +* Support `remote.name.pushurl` config #949 (@sinbad) +* Fix handling of `GIT_DIR` and `GIT_WORK_TREE` #963, #971 (@technoweenie) +* Fix handling of zero length files #966 (@nathanhi) +* Guard against invalid remotes passed to `push` and `pre-push` #974 (@technoweenie) +* Fix race condition in `git lfs pull` #972 (@technoweenie) + +### Extra + +* Add server API test tool #868 (@sinbad) +* Redo windows installer with innosetup #875 (@strich) +* Pre-built packages are built with Go v1.5.3 + +## 1.1.0 (18 November, 2015) + +* NTLM auth support #820 (@WillHipschman, @technoweenie) +* Add `prune` command #742 (@sinbad) +* Use .lfsconfig instead of .gitconfig #837 (@technoweenie) +* Rename "init" command #838 (@technoweenie) +* Raise error if credentials are needed #842 (@technoweenie) +* Support git repos in symlinked directories #818 (@sinbad, @difro, @jiangxin) +* Fix "git lfs env" to show correct SSH remote info #828 (@jiangxin) + +## 1.0.2 (28 October, 2015) + +* Fix issue with 'git lfs smudge' and the batch API. #795 (@technoweenie) +* Fix race condition in the git scanning code. #801 (@technoweenie) + +## 1.0.1 (23 October, 2015) + +* Downcase git config keys (prevents Auth loop) #690 (@WillHipschman) +* Show more info for unexpected http responses #710 (@rubyist) +* Use separate stdout/stderr buffers for `git-lfs-authenticate` #718 (@bozaro) +* Use LoggedError instead of Panic if update-index fails in checkout #735 (@sinbad) +* `smudge` command exits with non-zero if the download fails #732 (@rubyist) +* Use `git rev-parse` to find the git working dir #692 (@sinbad) +* Improved default remote behaviour & validation for fetch/pull #713 (@sinbad) +* Make fetch return error code when 1+ downloads failed #734 (@sinbad) +* Improve lfs.InRepo() detection in `init`/`update` #756 (@technoweenie) +* Teach smudge to use the batch api #711 (@rubyist) +* Fix not setting global attribute when needed to b/c of local state #765 (@sinbad) +* Fix clone fail when fetch is excluded globally #770 (@sinbad) +* Fix for partial downloads problem #763 (@technoweenie) +* Get integration tests passing on Windows #771 (@sinbad) + +### Security + +* Whitelist the valid keys read from .gitconfig #760 (@technoweenie) + +This prevents unsafe git configuration values from being used by Git LFS. + +## v1.0 (1 October, 2015) + +* Manual reference is integrated into the "help" options #665 @sinbad +* Fix `ls-files` when run from an empty repository #668 @Aorjoa +* Support listing duplicate files in `ls-files` #681 @Aorjoa @technoweenie +* `update` and `init` commands can install the pre-push hook in bare repositories #671 @technoweenie +* Add `GIT_LFS_SKIP_SMUDGE` and `init --skip-smudge` #679 @technoweenie + +## v0.6.0 (10 September, 2015) + +This is the first release that uses the new Batch API by default, while still +falling back to the Legacy API automatically. Also, new fetch/checkout/push +commands have been added. + +Run `git lfs update` in any local repositories to make sure all config settings +are updated. + +### Fetch + +* Rename old `git lfs fetch` command to `git lfs pull`. #527 (@sinbad) +* Add `git lfs checkout` #527 #543 #551 #566 (@sinbad) +* Add `git lfs fetch` for _just_ downloading objects. #527 (@sinbad) + * Add `remote` arg, and default to tracking remote instead of "origin". #583 (@sinbad) + * Support fetching multiple refs #542 (@sinbad) + * Add `--include` and `--exclude` flag for `git lfs fetch` #573 (@sinbad) + * Add `--recent` flag for downloading recent files outside of the current + checkout. #610 (@sinbad) + * Add `--all` option for download all objects from the server. #633 (@sinbad) +* Fix error handling while `git update-index` is running. #570 (@rubyist) + +See [git-lfs-fetch(1)](https://github.com/git-lfs/git-lfs/blob/v0.6.0/docs/man/git-lfs-fetch.1.ronn), +[git-lfs-checkout(1)](https://github.com/git-lfs/git-lfs/blob/v0.6.0/docs/man/git-lfs-checkout.1.ronn), +and [git-lfs-pull(1)](https://github.com/git-lfs/git-lfs/blob/v0.6.0/docs/man/git-lfs-pull.1.ronn) + for details. + +### Push + +* Support pushing multiple branches in the pre-push hook. #635 (@sinbad) +* Fix pushing objects from a branch that's not HEAD. #608 (@sinbad) +* Check server for objects before failing push because local is missing. #581 (@sinbad) +* Filter out commits from remote refs when pushing. #578 (@billygor) +* Support pushing all objects to the server, regardless of the remote ref. #646 (@technoweenie) +* Fix case where pre-push git hook exits with 0. #582 (@sinbad) + +See [git-lfs-push(1)](https://github.com/git-lfs/git-lfs/blob/v0.6.0/docs/man/git-lfs-push.1.ronn) for details. + +### API Clients + +* Fix some race conditions in the Batch API client. #577 #637 (@sinbad, @rubyist) +* Support retries in the Batch API client. #595 (@rubyist) +* Fix hanging batch client in certain error conditions. #594 (@rubyist) +* Treat 401 responses as errors in the Legacy API client. #634 (@rubyist) +* Fix bug in the Legacy API client when the object already exists on the server. #572 (@billygor) + +### Credentials + +* Fix how git credentials are checked in certain edge cases. #611 #650 #652 (@technoweenie) +* Send URI user to git credentials. #626 (@sinbad) +* Support git credentials with useHttpPath enabled. #554 (@clareliguori) + +### Installation + +* Docker images and scripts for building and testing linux packages. #511 #526 #555 #603 (@andyneff, @ssgelm) +* Create Windows GUI installer. #642 (@technoweenie) +* Binary releases use Go 1.5, which includes fix for Authorization when the + request URL includes just the username. [golang/go#11399](https://github.com/golang/go/issues/11399) + +### Misc + +* Documented Git config values used by Git LFS in [git-lfs-config(5)](https://github.com/git-lfs/git-lfs/blob/v0.6.0/docs/man/git-lfs-config.5.ronn). #610 (@sinbad) +* Experimental support for Git worktrees (in Git 2.5+) #546 (@sinbad) +* Experimental extension support. #486 (@ryansimmen) + +## v0.5.4 (30 July, 2015) + +* Ensure `git lfs uninit` cleans your git config thoroughly. #530 (@technoweenie) +* Fix issue with asking `git-credentials` for auth details after getting them +from the SSH command. #534 (@technoweenie) + +## v0.5.3 (23 July, 2015) + +* `git lfs fetch` bugs #429 (@rubyist) +* Push can crash on 32 bit architectures #450 (@rubyist) +* Improved SSH support #404, #464 (@sinbad, @technoweenie) +* Support 307 redirects with relative url #442 (@sinbad) +* Fix `init` issues when upgrading #446 #451 #452 #465 (@technoweenie, @rubyist) +* Support chunked Transfer-Encoding #386 (@ryansimmen) +* Fix issue with pushing deleted objects #461 (@technoweenie) +* Teach `git lfs push` how to send specific objects #449 (@larsxschneider) +* Update error message when attempting to push objects that don't exist in `.git/lfs/objects` #447 (@technoweenie) +* Fix bug in HTTP client when response body is nil #472 #488 (@rubyist, @technoweenie) +* `-crlf` flag in gitattributes is deprecated #475 (@technoweenie) +* Improvements to the CentOS and Debian build and package scripts (@andyneff, @ssgelm) + +## v0.5.2 (19 June, 2015) + +* Add `git lfs fetch` command for downloading objects. #285 (@rubyist) +* Fix `git lfs track` issues when run outside of a git repository #312, #323 (@michael-k, @Aorjoa) +* Fix `git lfs track` for paths with spaces in them #327 (@technoweenie) +* Fix `git lfs track` by writing relative paths to .gitattributes #356 (@michael-k) +* Fix `git lfs untrack` so it doesn't remove entries incorrectly from .gitattributes #398 (@michael-k) +* Fix `git lfs clean` bug with zero length files #346 (@technoweenie) +* Add `git lfs fsck` #373 (@zeroshirts, @michael-k) +* The Git pre-push warns if Git LFS is not installed #339 (@rubyist) +* Fix Content-Type header sent by the HTTP client #329 (@joerg) +* Improve performance tracing while scanning refs #311 (@michael-k) +* Fix detection of LocalGitDir and LocalWorkingDir #312 #354 #361 (@michael-k) +* Fix inconsistent file mode bits for directories created by Git LFS #364 (@michael-k) +* Optimize shell execs #377, #382, #391 (@bozaro) +* Collect HTTP transfer stats #366, #400 (@rubyist) +* Support GIT_DIR and GIT_WORK_TREE #370 (@michael-k) +* Hide Git application window in Windows #381 (@bozaro) +* Add support for configured URLs containing credentials per RFC1738 #408 (@ewbankkit, @technoweenie) +* Add experimental support for batch API calls #285 (@rubyist) +* Improve linux build instructions for CentOS and Debian. #299 #309 #313 #332 (@jsh, @ssgelm, @andyneff) + +## v0.5.1 (30 April, 2015) + +* Fix Windows install.bat script. #223 (@PeterDaveHello) +* Fix bug where `git lfs clean` will clean Git LFS pointers too #271 (@technoweenie) +* Better timeouts for the HTTP client #215 (@Mistobaan) +* Concurrent uploads through `git lfs push` #258 (@rubyist) +* Fix `git lfs smudge` behavior with zero-length file in `.git/lfs/objects` #267 (@technoweenie) +* Separate out pre-push hook behavior from `git lfs push` #263 (@technoweenie) +* Add diff/merge properties to .gitattributes #265 (@technoweenie) +* Respect `GIT_TERMINAL_PROMPT ` #257 (@technoweenie) +* Fix CLI progress bar output #185 (@technoweenie) +* Fail fast in `clean` and `smudge` commands when run without STDIN #264 (@technoweenie) +* Fix shell quoting in pre-push hook. #235 (@mhagger) +* Fix progress bar output during file uploads. #185 (@technoweenie) +* Change `remote.{name}.lfs_url` to `remote.{name}.lfsurl` #237 (@technoweenie) +* Swap `git config` order. #245 (@technoweenie) +* New `git lfs pointer` command for generating and comparing pointers #246 (@technoweenie) +* Follow optional "href" property from git-lfs-authenticate SSH command #247 (@technoweenie) +* `.git/lfs/objects` spec clarifications: #212 (@rtyley), #244 (@technoweenie) +* man page updates: #228 (@mhagger) +* pointer spec clarifications: #246 (@technoweenie) +* Code comments for the untrack command: #225 (@thekafkaf) + +## v0.5.0 (10 April, 2015) + +* Initial public release diff --git a/out/merged/git-lfs-3.2.0/README.md b/out/merged/git-lfs-3.2.0/README.md new file mode 100644 index 0000000000000000000000000000000000000000..da34c6264dcf52995e5a1b6f49a34bb6c0cfdc85 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/README.md @@ -0,0 +1,259 @@ +# Git Large File Storage + +[![CI status][ci_badge]][ci_url] + +[ci_badge]: https://github.com/git-lfs/git-lfs/workflows/CI/badge.svg +[ci_url]: https://github.com/git-lfs/git-lfs/actions?query=workflow%3ACI + +[Git LFS](https://git-lfs.github.com) is a command line extension and +[specification](docs/spec.md) for managing large files with Git. + +The client is written in Go, with pre-compiled binaries available for Mac, +Windows, Linux, and FreeBSD. Check out the [website](http://git-lfs.github.com) +for an overview of features. + +## Getting Started + +### Downloading + +You can install the Git LFS client in several different ways, depending on your +setup and preferences. + +* **Linux users**. Debian and RPM packages are available from + [PackageCloud](https://packagecloud.io/github/git-lfs/install). +* **macOS users**. [Homebrew](https://brew.sh) bottles are distributed, and can + be installed via `brew install git-lfs`. +* **Windows users**. Git LFS is included in the distribution of + [Git for Windows](https://gitforwindows.org/). Alternatively, you can + install a recent version of Git LFS from the [Chocolatey](https://chocolatey.org/) package manager. +* **Binary packages**. In addition, [binary packages](https://github.com/git-lfs/git-lfs/releases) are +available for Linux, macOS, Windows, and FreeBSD. +* **Building from source**. [This repository](https://github.com/git-lfs/git-lfs.git) can also be +built from source using the latest version of [Go](https://golang.org), and the +available instructions in our +[Wiki](https://github.com/git-lfs/git-lfs/wiki/Installation#source). + +Note that Debian and RPM packages are built for all OSes for amd64 and i386. +For arm64, only Debian packages for the latest Debian release are built due to the cost of building in emulation. + +### Installing + +#### From binary + +The [binary packages](https://github.com/git-lfs/git-lfs/releases) include a script which will: + +- Install Git LFS binaries onto the system `$PATH` +- Run `git lfs install` to +perform required global configuration changes. + +```ShellSession +$ ./install.sh +``` + +#### From source + +- Ensure you have the latest version of Go, GNU make, and a standard Unix-compatible build environment installed. +- On Windows, install `goversioninfo` with `go install github.com/josephspurrier/goversioninfo/cmd/goversioninfo@latest`. +- Run `make`. +- Place the `git-lfs` binary, which can be found in `bin`, on your system’s executable `$PATH` or equivalent. +- Git LFS requires global configuration changes once per-machine. This can be done by +running: + +```ShellSession +$ git lfs install +``` + +#### Verifying releases + +Releases are signed with the OpenPGP key of one of the core team members. To +get these keys, you can run the following command, which will print them to +standard output: + +```ShellSession +$ curl -L https://api.github.com/repos/git-lfs/git-lfs/tarball/core-gpg-keys | tar -Ozxf - +``` + +Once you have the keys, you can download the `sha256sums.asc` file and verify +the file you want like so: + +```ShellSession +$ gpg -d sha256sums.asc | grep git-lfs-linux-amd64-v2.10.0.tar.gz | shasum -a 256 -c +``` + +For the convenience of distributors, we also provide a wider variety of signed +hashes in the `hashes.asc` file. Those hashes are in the tagged BSD format, but +can be verified with Perl's `shasum` or the GNU hash utilities, just like the +ones in `sha256sums.asc`. + +## Example Usage + +To begin using Git LFS within a Git repository that is not already configured +for Git LFS, you can indicate which files you would like Git LFS to manage. +This can be done by running the following _from within a Git repository_: + +```bash +$ git lfs track "*.psd" +``` + +(Where `*.psd` is the pattern of filenames that you wish to track. You can read +more about this pattern syntax +[here](https://git-scm.com/docs/gitattributes)). + +> *Note:* the quotation marks surrounding the pattern are important to +> prevent the glob pattern from being expanded by the shell. + +After any invocation of `git-lfs-track(1)` or `git-lfs-untrack(1)`, you _must +commit changes to your `.gitattributes` file_. This can be done by running: + +```bash +$ git add .gitattributes +$ git commit -m "track *.psd files using Git LFS" +``` + +You can now interact with your Git repository as usual, and Git LFS will take +care of managing your large files. For example, changing a file named `my.psd` +(tracked above via `*.psd`): + +```bash +$ git add my.psd +$ git commit -m "add psd" +``` + +> _Tip:_ if you have large files already in your repository's history, `git lfs +> track` will _not_ track them retroactively. To migrate existing large files +> in your history to use Git LFS, use `git lfs migrate`. For example: +> +> ``` +> $ git lfs migrate import --include="*.psd" --everything +> ``` +> +> **Note that this will rewrite history and change all of the Git object IDs in your +> repository, just like the export version of this command.** +> +> For more information, read [`git-lfs-migrate(1)`](https://github.com/git-lfs/git-lfs/blob/main/docs/man/git-lfs-migrate.1.ronn). + +You can confirm that Git LFS is managing your PSD file: + +```bash +$ git lfs ls-files +3c2f7aedfb * my.psd +``` + +Once you've made your commits, push your files to the Git remote: + +```bash +$ git push origin main +Uploading LFS objects: 100% (1/1), 810 B, 1.2 KB/s +# ... +To https://github.com/git-lfs/git-lfs-test + 67fcf6a..47b2002 main -> main +``` + +Note: Git LFS requires at least Git 1.8.2 on Linux or 1.8.5 on macOS. + +### Uninstalling + +If you've decided that Git LFS isn't right for you, you can convert your +repository back to a plain Git repository with `git lfs migrate` as well. For +example: + +```ShellSession +$ git lfs migrate export --include="*.psd" --everything +``` + +**Note that this will rewrite history and change all of the Git object IDs in your +repository, just like the import version of this command.** + +If there's some reason that things aren't working out for you, please let us +know in an issue, and we'll definitely try to help or get it fixed. + +## Limitations + +Git LFS maintains a list of currently known limitations, which you can find and +edit [here](https://github.com/git-lfs/git-lfs/wiki/Limitations). + +Git LFS source code utilizes Go modules in its build system, and therefore this +project contains a `go.mod` file with a defined Go module path. However, we +do not maintain a stable Go language API or ABI, as Git LFS is intended to be +used solely as a compiled binary utility. Please do not import the `git-lfs` +module into other Go code and do not rely on it as a source code dependency. + +## Need Help? + +You can get help on specific commands directly: + +```bash +$ git lfs help +``` + +The [official documentation](docs) has command references and specifications for +the tool. There's also a [FAQ](https://github.com/git-lfs/git-lfs/wiki/FAQ) on +the wiki which answers some common questions. + +If you have a question on how to use Git LFS, aren't sure about something, or +are looking for input from others on tips about best practices or use cases, +feel free to +[start a discussion](https://github.com/git-lfs/git-lfs/discussions). + +You can always [open an issue](https://github.com/git-lfs/git-lfs/issues), and +one of the Core Team members will respond to you. Please be sure to include: + +1. The output of `git lfs env`, which displays helpful information about your + Git repository useful in debugging. +2. Any failed commands re-run with `GIT_TRACE=1` in the environment, which + displays additional information pertaining to why a command crashed. + +## Contributing + +See [CONTRIBUTING.md](CONTRIBUTING.md) for info on working on Git LFS and +sending patches. Related projects are listed on the [Implementations wiki +page](https://github.com/git-lfs/git-lfs/wiki/Implementations). + +See also [SECURITY.md](SECURITY.md) for info on how to submit reports +of security vulnerabilities. + +## Core Team + +These are the humans that form the Git LFS core team, which runs the project. + +In alphabetical order: + +| [@bk2204][bk2204-user] | [@chrisd8088][chrisd8088-user] | [@larsxschneider][larsxschneider-user] | +| :---: | :---: | :---: | +| [![][bk2204-img]][bk2204-user] | [![][chrisd8088-img]][chrisd8088-user] | [![][larsxschneider-img]][larsxschneider-user] | +| [PGP 0223B187][bk2204-pgp] | [PGP 088335A9][chrisd8088-pgp] | [PGP A5795889][larsxschneider-pgp] | + +[bk2204-img]: https://avatars1.githubusercontent.com/u/497054?s=100&v=4 +[chrisd8088-img]: https://avatars1.githubusercontent.com/u/28857117?s=100&v=4 +[larsxschneider-img]: https://avatars1.githubusercontent.com/u/477434?s=100&v=4 +[bk2204-user]: https://github.com/bk2204 +[chrisd8088-user]: https://github.com/chrisd8088 +[larsxschneider-user]: https://github.com/larsxschneider +[bk2204-pgp]: https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x88ace9b29196305ba9947552f1ba225c0223b187 +[chrisd8088-pgp]: https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x86cd3297749375bcf8206715f54fe648088335a9 +[larsxschneider-pgp]: https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xaa3b3450295830d2de6db90caba67be5a5795889 + +### Alumni + +These are the humans that have in the past formed the Git LFS core team, or +have otherwise contributed a significant amount to the project. Git LFS would +not be possible without them. + +In alphabetical order: + +| [@andyneff][andyneff-user] | [@PastelMobileSuit][PastelMobileSuit-user] | [@rubyist][rubyist-user] | [@sinbad][sinbad-user] | [@technoweenie][technoweenie-user] | [@ttaylorr][ttaylorr-user] | +| :---: | :---: | :---: | :---: | :---: | :---: | +| [![][andyneff-img]][andyneff-user] | [![][PastelMobileSuit-img]][PastelMobileSuit-user] | [![][rubyist-img]][rubyist-user] | [![][sinbad-img]][sinbad-user] | [![][technoweenie-img]][technoweenie-user] | [![][ttaylorr-img]][ttaylorr-user] | + +[andyneff-img]: https://avatars1.githubusercontent.com/u/7596961?v=3&s=100 +[PastelMobileSuit-img]: https://avatars2.githubusercontent.com/u/37254014?s=100&v=4 +[rubyist-img]: https://avatars1.githubusercontent.com/u/143?v=3&s=100 +[sinbad-img]: https://avatars1.githubusercontent.com/u/142735?v=3&s=100 +[technoweenie-img]: https://avatars3.githubusercontent.com/u/21?v=3&s=100 +[ttaylorr-img]: https://avatars2.githubusercontent.com/u/443245?s=100&v=4 +[andyneff-user]: https://github.com/andyneff +[PastelMobileSuit-user]: https://github.com/PastelMobileSuit +[sinbad-user]: https://github.com/sinbad +[rubyist-user]: https://github.com/rubyist +[technoweenie-user]: https://github.com/technoweenie +[ttaylorr-user]: https://github.com/ttaylorr diff --git a/out/merged/git-lfs-3.2.0/git-lfs b/out/merged/git-lfs-3.2.0/git-lfs new file mode 100644 index 0000000000000000000000000000000000000000..96092eb2c4ad5c0737aa541dff7d352baa28fe0c --- /dev/null +++ b/out/merged/git-lfs-3.2.0/git-lfs @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05309d4bc952e77251de76d8dda6bfbb9e9f532757abfc41a454f0e8327d7e33 +size 10768384 diff --git a/out/merged/git-lfs-3.2.0/install.sh b/out/merged/git-lfs-3.2.0/install.sh new file mode 100644 index 0000000000000000000000000000000000000000..28e45f1b6f474e039cf05700e3fd43c85876dc15 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/install.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +set -eu + +prefix="/usr/local" + +if [ "${PREFIX:-}" != "" ] ; then + prefix=${PREFIX:-} +elif [ "${BOXEN_HOME:-}" != "" ] ; then + prefix=${BOXEN_HOME:-} +fi + +mkdir -p $prefix/bin +rm -rf $prefix/bin/git-lfs* + +pushd "$( dirname "${BASH_SOURCE[0]}" )" > /dev/null + for g in git*; do + install $g "$prefix/bin/$g" + done +popd > /dev/null + +PATH+=:$prefix/bin +git lfs install diff --git a/out/merged/git-lfs-3.2.0/man/html/git-lfs-checkout.1.html b/out/merged/git-lfs-3.2.0/man/html/git-lfs-checkout.1.html new file mode 100644 index 0000000000000000000000000000000000000000..9455cb3c1789d55ecfd8f6b132266540e4f3e2b3 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/html/git-lfs-checkout.1.html @@ -0,0 +1,178 @@ + + + + + + git-lfs-checkout(1) - Update working copy with file content if available + + + + +
+ + + +
    +
  1. git-lfs-checkout(1)
  2. +
  3. +
  4. git-lfs-checkout(1)
  5. +
+ + + +

NAME

+

+ git-lfs-checkout - Update working copy with file content if available +

+

SYNOPSIS

+ +

git lfs checkout [glob-pattern...]
+git lfs checkout --to file {--base|--ours|--theirs} conflict-obj-path

+ +

DESCRIPTION

+ +

Try to ensure that the working copy contains file content for Git LFS objects +for the current ref, if the object data is available. Does not download any +content; see git-lfs-fetch(1) for that.

+ +

Checkout scans the current ref for all LFS objects that would be required, then +where a file is either missing in the working copy, or contains placeholder +pointer content with the same SHA, the real file content is written, provided +we have it in the local store. Modified files are never overwritten.

+ +

One or more glob-patterns may be provided as arguments to restrict the +set of files that are updated. Glob patterns are matched as per the format +described in gitignore(5).

+ +

When used with --to and the working tree is in a conflicted state due to a +merge, this option checks out one of the three stages a conflicting Git LFS +object into a separate file (which can be outside of the work tree). +This can make using diff tools to inspect and resolve merges easier. +A single Git LFS object's file path must be provided in conflict-obj-path.

+ +

OPTIONS

+ +
+
--base
+
Check out the merge base of the specified file.
+
--ours
+
Check out our side (that of the current branch) of the conflict for the +specified file.
+
--theirs
+
Check out their side (that of the other branch) of the conflict for the +specified file.
+
+--to path +
+
If the working tree is in a conflicted state, check out the portion of the +conflict specified by --base, --ours, or --theirs to the given path.
+
+ +

EXAMPLES

+ +
+
Checkout all files that are missing or placeholders:
+
+
+ +

+$ git lfs checkout +

+ +
+
Checkout a specific couple of files:
+
+
+ +

+$ git lfs checkout path/to/file1.png path/to.file2.png +

+ +
+
Checkout a path with a merge conflict into separate files:
+
+
+ +

``` +# Attempt merge with a branch that has a merge conflict +$ git merge conflicting-branch +CONFLICT (content): Merge conflict in path/to/conflicting/file.dat

+ +

Checkout versions of the conflicting file into temp files

+

$ git lfs checkout ours.dat --ours path/to/conflicting/file.dat +$ git lfs checkout theirs.dat --theirs path/to/conflicting/file.dat

+ +

Compare conflicting versions in ours.dat and theirs.dat,

+

# then resolve conflict (e.g., by choosing one version over +# the other, or creating a new version)

+ +

Cleanup and continue with merge

+

$ rm ours.dat theirs.dat +$ git add path/to/conflicting/file.dat +$ git merge --continue +```

+ +

SEE ALSO

+ +

git-lfs-fetch(1), git-lfs-pull(1), gitignore(5).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-checkout(1)
  5. +
+ +
+ + diff --git a/out/merged/git-lfs-3.2.0/man/html/git-lfs-clean.1.html b/out/merged/git-lfs-3.2.0/man/html/git-lfs-clean.1.html new file mode 100644 index 0000000000000000000000000000000000000000..c0ec25731e3e4155c968bb513db624acf78f06e4 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/html/git-lfs-clean.1.html @@ -0,0 +1,104 @@ + + + + + + git-lfs-clean(1) - Git clean filter that converts large files to pointers + + + + +
+ + + +
    +
  1. git-lfs-clean(1)
  2. +
  3. +
  4. git-lfs-clean(1)
  5. +
+ + + +

NAME

+

+ git-lfs-clean - Git clean filter that converts large files to pointers +

+

SYNOPSIS

+ +

git lfs clean path

+ +

DESCRIPTION

+ +

Read the contents of a large file from standard input, and write a Git +LFS pointer file for that file to standard output.

+ +

Clean is typically run by Git's clean filter, configured by the repository's +Git attributes.

+ +

Clean is not part of the user-facing Git plumbing commands. To preview the +pointer of a large file as it would be generated, see the git-lfs-pointer(1) +command.

+ +

SEE ALSO

+ +

git-lfs-install(1), git-lfs-push(1), git-lfs-pointer(1), gitattributes(5).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-clean(1)
  5. +
+ +
+ + diff --git a/out/merged/git-lfs-3.2.0/man/html/git-lfs-clone.1.html b/out/merged/git-lfs-3.2.0/man/html/git-lfs-clone.1.html new file mode 100644 index 0000000000000000000000000000000000000000..8a59b2146cb55656c21fcbf23e6431a8478d9505 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/html/git-lfs-clone.1.html @@ -0,0 +1,145 @@ + + + + + + git-lfs-clone(1) - Efficiently clone a LFS-enabled repository + + + + +
+ + + +
    +
  1. git-lfs-clone(1)
  2. +
  3. +
  4. git-lfs-clone(1)
  5. +
+ + + +

NAME

+

+ git-lfs-clone - Efficiently clone a LFS-enabled repository +

+

SYNOPSIS

+ +

git lfs clone [git clone options] repository [directory]

+ +

DESCRIPTION

+ +

Clone an LFS enabled Git repository more efficiently by disabling LFS during the +git clone, then performing a 'git lfs pull' directly afterwards.

+ +

'git lfs clone' also installs all of the repo-level hooks (.git/hooks) that LFS +requires to operate. If --separate-git-dir is given, the hooks will be +installed there.

+ +

This is faster than a regular 'git clone' because that will download LFS content +using the smudge filter, which is executed individually per file in the working +copy. This is relatively inefficient compared to the batch mode and parallel +downloads performed by 'git lfs pull'.

+ +

OPTIONS

+ +

All options supported by 'git clone'

+ +
+
+-I paths --include=paths +
+
See INCLUDE AND EXCLUDE +
+
+-X paths --exclude=paths +
+
See INCLUDE AND EXCLUDE +
+
--skip-repo
+
Skip installing repo-level hooks (.git/hooks) that LFS requires. Disabled by +default.
+
+ +

INCLUDE AND EXCLUDE

+ +

You can configure Git LFS to only fetch objects to satisfy references in certain +paths of the repo, and/or to exclude certain paths of the repo, to reduce the +time you spend downloading things you do not use.

+ +

In your Git configuration or in a .lfsconfig file, you may set either or +both of lfs.fetchinclude and lfs.fetchexclude to comma-separated lists of +paths. If lfs.fetchinclude is defined, Git LFS objects will only be fetched +if their path matches one in that list, and if lfs.fetchexclude is defined, +Git LFS objects will only be fetched if their path does not match one +in that list. Paths are matched using wildcard matching as per gitignore(5).

+ +

Note that using the command-line options -I and -X override the respective +configuration settings. Setting either option to an empty string clears the +value.

+ +

SEE ALSO

+ +

git-clone(1), git-lfs-pull(1), gitignore(5).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-clone(1)
  5. +
+ +
+ + diff --git a/out/merged/git-lfs-3.2.0/man/html/git-lfs-config.5.html b/out/merged/git-lfs-3.2.0/man/html/git-lfs-config.5.html new file mode 100644 index 0000000000000000000000000000000000000000..5c76e9666143b5f50b4f99fb0979bd845fe87e30 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/html/git-lfs-config.5.html @@ -0,0 +1,604 @@ + + + + + + git-lfs-config(5) - Configuration options for git-lfs + + + + +
+ + + +
    +
  1. git-lfs-config(5)
  2. +
  3. +
  4. git-lfs-config(5)
  5. +
+ + + +

NAME

+

+ git-lfs-config - Configuration options for git-lfs +

+

CONFIGURATION FILES

+ +

git-lfs reads its configuration from any file supported by git config -l, +including all per-repository, per-user, and per-system Git configuration files.

+ +

Additionally, a small number of settings can be specified in a file called +.lfsconfig at the root of the repository; see the "LFSCONFIG" section for more +details. This configuration file is useful for setting options such as the LFS +URL or access type for all users of a repository, especially when these differ +from the default. The .lfsconfig file uses the same format as .gitconfig.

+ +

If the .lfsconfig file is missing, the index is checked for a version of the +file, and that is used instead. If both are missing, HEAD is checked for the +file. If the repository is bare, only HEAD is checked. This order may change +for checkouts in the future to better match Git's behavior.

+ +

Settings from Git configuration files override the .lfsconfig file. This +allows you to override settings like lfs.url in your local environment without +having to modify the .lfsconfig file.

+ +

Most options regarding git-lfs are contained in the [lfs] section, meaning +they are all named lfs.foo or similar, although occasionally an lfs option can +be scoped inside the configuration for a remote.

+ +

LIST OF OPTIONS

+ +

General settings

+ +
    +
  • +

    lfs.url / remote.<remote>.lfsurl

    + +

    The url used to call the Git LFS remote API. Default blank (derive from clone +URL).

    +
  • +
  • +

    lfs.pushurl / remote.<remote>.lfspushurl

    + +

    The url used to call the Git LFS remote API when pushing. Default blank (derive +from either LFS non-push urls or clone url).

    +
  • +
  • +

    remote.lfsdefault

    + +

    The remote used to find the Git LFS remote API. lfs.url and +branch.*.remote for the current branch override this setting. If this +setting is not specified and there is exactly one remote, that remote is +picked; otherwise, the default is origin.

    +
  • +
  • +

    remote.lfspushdefault

    + +

    The remote used to find the Git LFS remote API when pushing. lfs.url and +branch.*.pushremote for the current branch override this setting. If this +setting is not set, remote.pushdefault is used, or if that is not set, the +order of selection is used as specified in the remote.lfsdefault above.

    +
  • +
  • +

    lfs.dialtimeout

    + +

    Sets the maximum time, in seconds, that the HTTP client will wait to initiate +a connection. This does not include the time to send a request and wait for a +response. Default: 30 seconds

    +
  • +
  • +

    lfs.tlstimeout

    + +

    Sets the maximum time, in seconds, that the HTTP client will wait for a TLS +handshake. Default: 30 seconds.

    +
  • +
  • +

    lfs.activitytimeout / lfs.https://<host>.activitytimeout

    + +

    Sets the maximum time, in seconds, that the HTTP client will wait for the +next tcp read or write. If < 1, no activity timeout is used at all. +Default: 30 seconds

    +
  • +
  • +

    lfs.keepalive

    + +

    Sets the maximum time, in seconds, for the HTTP client to maintain keepalive +connections. Default: 30 minutes.

    +
  • +
  • +

    lfs.ssh.automultiplex

    + +

    When using the pure SSH-based protocol, whether to multiplex requests over a +single connection when possible. This option requires the use of OpenSSH or a +compatible SSH client. Default: true.

    +
  • +
  • +

    lfs.ssh.retries

    + +

    Specifies the number of times Git LFS will attempt to obtain authorization via +SSH before aborting. Default: 5.

    +
  • +
  • +

    core.askpass, GIT_ASKPASS

    + +

    Given as a program and its arguments, this is invoked when authentication is +needed against the LFS API. The contents of stdout are interpreted as the +password.

    +
  • +
  • +

    lfs.cachecredentials

    + +

    Enables in-memory SSH and Git Credential caching for a single 'git lfs' +command. Default: enabled.

    +
  • +
  • +

    lfs.storage

    + +

    Allow override LFS storage directory. Non-absolute path is relativized to +inside of Git repository directory (usually .git).

    + +

    Note: you should not run git lfs prune if you have different repositories +sharing the same storage directory.

    + +

    Default: lfs in Git repository directory (usually .git/lfs).

    +
  • +
  • +

    lfs.largefilewarning

    + +

    Warn when a file is 4 GiB or larger. Such files will be corrupted when using +Windows (unless smudging is disabled) with a Git for Windows version less than +2.34.0 due to a limitation in Git. Default: true if the version is less than +2.34.0, false otherwise.

    +
  • +
+ +

Upload and download transfer settings

+ +

These settings control how the upload and download of LFS content occurs.

+ +
    +
  • +

    lfs.concurrenttransfers

    + +

    The number of concurrent uploads/downloads. Default 8.

    +
  • +
  • +

    lfs.basictransfersonly

    + +

    If set to true, only basic HTTP upload/download transfers will be used, +ignoring any more advanced transfers that the client/server may support. +This is primarily to work around bugs or incompatibilities.

    + +

    The git-lfs client supports basic HTTP downloads, resumable HTTP downloads +(using Range headers), and resumable uploads via tus.io protocol. Custom +transfer methods can be added via lfs.customtransfer (see next section). +However setting this value to true limits the client to simple HTTP.

    +
  • +
  • +

    lfs.tustransfers

    + +

    If set to true, this enables resumable uploads of LFS objects through the +tus.io API. Once this feature is finalized, this setting will be removed, +and tus.io uploads will be available for all clients.

    +
  • +
  • +

    lfs.standalonetransferagent

    + +

    Allows the specified custom transfer agent to be used directly +for transferring files, without asking the server how the transfers +should be made. The custom transfer agent has to be defined in a +lfs.customtransfer.<name> settings group.

    +
  • +
  • +

    lfs.customtransfer.<name>.path

    + +

    lfs.customtransfer.<name> is a settings group which defines a custom +transfer hook which allows you to upload/download via an intermediate process, +using any mechanism you like (rather than just HTTP). path should point to +the process you wish to invoke. The protocol between the git-lfs client and +the custom transfer process is documented at +https://github.com/git-lfs/git-lfs/blob/main/docs/custom-transfers.md

    + +

    name must be a unique identifier that the LFS server understands. When +calling the LFS API the client will include a list of supported transfer +types. If the server also supports this named transfer type, it will select it +and actions returned from the API will be in relation to that transfer type +(may not be traditional URLs for example). Only if the server accepts name +as a transfer it supports will this custom transfer process be invoked.

    +
  • +
  • +

    lfs.customtransfer.<name>.args

    + +

    If the custom transfer process requires any arguments, these can be provided +here. This string will be expanded by the shell.

    +
  • +
  • +

    lfs.customtransfer.<name>.concurrent

    + +

    If true (the default), git-lfs will invoke the custom transfer process +multiple times in parallel, according to lfs.concurrenttransfers, splitting +the transfer workload between the processes.

    +
  • +
  • +

    lfs.customtransfer.<name>.direction

    + +

    Specifies which direction the custom transfer process supports, either +"download", "upload", or "both". The default if unspecified is "both".

    +
  • +
  • +

    lfs.transfer.maxretries

    + +

    Specifies how many retries LFS will attempt per OID before marking the +transfer as failed. Must be an integer which is at least one. If the value is +not an integer, is less than one, or is not given, a value of eight will be +used instead.

    +
  • +
  • +

    lfs.transfer.maxretrydelay

    + +

    Specifies the maximum time in seconds LFS will wait between each retry +attempt. LFS uses exponential backoff for retries, doubling the time between +each retry until reaching this limit. If a server requests a delay using the +Retry-After header, the header value overrides the exponential delay for +that attempt and is not limited by this option.

    + +

    Must be an integer which is not negative. Use zero to disable delays between +retries unless requested by a server. If the value is not an integer, is +negative, or is not given, a value of ten will be used instead.

    +
  • +
  • +

    lfs.transfer.maxverifies

    + +

    Specifies how many verification requests LFS will attempt per OID before +marking the transfer as failed, if the object has a verification action +associated with it. Must be an integer which is at least one. If the value is +not an integer, is less than one, or is not given, a default value of three +will be used instead.

    +
  • +
  • +

    lfs.transfer.enablehrefrewrite

    + +

    If set to true, this enables rewriting href of LFS objects using +url.*.insteadof/pushinsteadof config. pushinsteadof is used only for +uploading, and insteadof is used for downloading and for uploading when +pushinsteadof is not set.

    +
  • +
+ +

Push settings

+ +
    +
  • +

    lfs.allowincompletepush

    + +

    When pushing, allow objects to be missing from the local cache without halting +a Git push. Default: false.

    +
  • +
+ +

Fetch settings

+ +
    +
  • +

    lfs.fetchinclude

    + +

    When fetching, only download objects which match any entry on this +comma-separated list of paths/filenames. Wildcard matching is as per +gitignore(5). See git-lfs-fetch(1) for examples.

    +
  • +
  • +

    lfs.fetchexclude

    + +

    When fetching, do not download objects which match any item on this +comma-separated list of paths/filenames. Wildcard matching is as per +gitignore(5). See git-lfs-fetch(1) for examples.

    +
  • +
  • +

    lfs.fetchrecentrefsdays

    + +

    If non-zero, fetches refs which have commits within N days of the current +date. Only local refs are included unless lfs.fetchrecentremoterefs is true. +Also used as a basis for pruning old files. +The default is 7 days.

    +
  • +
  • +

    lfs.fetchrecentremoterefs

    + +

    If true, fetches remote refs (for the remote you're fetching) as well as local +refs in the recent window. This is useful to fetch objects for remote branches +you might want to check out later. The default is true; if you set this to +false, fetching for those branches will only occur when you either check them +out (losing the advantage of fetch --recent), or create a tracking local +branch separately then fetch again.

    +
  • +
  • +

    lfs.fetchrecentcommitsdays

    + +

    In addition to fetching at refs, also fetches previous changes made within N +days of the latest commit on the ref. This is useful if you're often reviewing +recent changes. Also used as a basis for pruning old files. +The default is 0 (no previous changes).

    +
  • +
  • +

    lfs.fetchrecentalways

    + +

    Always operate as if --recent was included in a git lfs fetch call. Default +false.

    +
  • +
+ +

Prune settings

+ +
    +
  • +

    lfs.pruneoffsetdays

    + +

    The number of days added to the lfs.fetchrecent* settings to determine what +can be pruned. Default is 3 days, i.e. that anything fetched at the very +oldest edge of the 'recent window' is eligible for pruning 3 days later.

    +
  • +
  • +

    lfs.pruneremotetocheck

    + +

    Set the remote that LFS files must have been pushed to in order for them to +be considered eligible for local pruning. Also the remote which is called if +--verify-remote is enabled.

    +
  • +
  • +

    lfs.pruneverifyremotealways

    + +

    Always run git lfs prune as if --verify-remote was provided.

    +
  • +
+ +

Extensions

+ +
    +
  • +

    lfs.extension.<name>.<setting>

    + +

    Git LFS extensions enable the manipulation of files streams during smudge and +clean. name groups the settings for a single extension, and the settings +are: +* clean The command which runs when files are added to the index +* smudge The command which runs when files are written to the working copy +* priority The order of this extension compared to others

    +
  • +
+ +

Other settings

+ +
    +
  • +

    lfs.<url>.access

    + +

    Note: this setting is normally set by LFS itself on receiving a 401 response +(authentication required), you don't normally need to set it manually.

    + +

    If set to "basic" then credentials will be requested before making batch +requests to this url, otherwise a public request will initially be attempted.

    +
  • +
  • +

    lfs.<url>.locksverify

    + +

    Determines whether locks are checked before Git pushes. This prevents you from +pushing changes to files that other users have locked. The Git LFS pre-push +hook varies its behavior based on the value of this config key.

    + +
      +
    • +null - In the absence of a value, Git LFS will attempt the call, and warn +if it returns an error. If the response is valid, Git LFS will set the value +to true, and will halt the push if the user attempts to update a file locked +by another user. If the server returns a 501 Not Implemented response, Git +LFS will set the value to false. +
    • +
    • +true - Git LFS will attempt to verify locks, halting the Git push if there +are any server issues, or if the user attempts to update a file locked by +another user.
    • +
    • +false - Git LFS will completely skip the lock check in the pre-push hook. +You should set this if you're not using File Locking, or your Git server +verifies locked files on pushes automatically.
    • +
    + +

    Supports URL config lookup as described in: +https://git-scm.com/docs/git-config#git-config-httplturlgt. To set this value +per-host: git config --global lfs.https://github.com/.locksverify [true|false].

    +
  • +
  • +

    lfs.<url>.contenttype

    + +

    Determines whether Git LFS should attempt to detect an appropriate HTTP +Content-Type header when uploading using the 'basic' upload adapter. If set +to false, the default header of Content-Type: application/octet-stream is +chosen instead. Default: 'true'.

    +
  • +
  • +

    lfs.skipdownloaderrors

    + +

    Causes Git LFS not to abort the smudge filter when a download error is +encountered, which allows actions such as checkout to work when you are unable +to download the LFS content. LFS files which could not download will contain +pointer content instead.

    + +

    Note that this will result in git commands which call the smudge filter to +report success even in cases when LFS downloads fail, which may affect +scripts.

    + +

    You can also set the environment variable GIT_LFS_SKIP_DOWNLOAD_ERRORS=1 to +get the same effect.

    +
  • +
  • +

    GIT_LFS_PROGRESS

    + +

    This environment variable causes Git LFS to emit progress updates to an +absolute file-path on disk when cleaning, smudging, or fetching.

    + +

    Progress is reported periodically in the form of a new line being appended to +the end of the file. Each new line will take the following format:

    + +

    <direction> <current>/<total files> <downloaded>/<total> <name>

    + +

    Each field is described below: +* direction: The direction of transfer, either "checkout", "download", or + "upload". +* current The index of the currently transferring file. +* total files The estimated count of all files to be transferred. +* downloaded The number of bytes already downloaded. +* total The entire size of the file, in bytes. +* name The name of the file.

    +
  • +
  • +

    GIT_LFS_FORCE_PROGRESS +lfs.forceprogress

    + +

    Controls whether Git LFS will suppress progress status when the standard +output stream is not attached to a terminal. The default is false which +makes Git LFS detect whether stdout is a terminal and suppress progress when +it's not; you can disable this behaviour and force progress status even when +standard output stream is not a terminal by setting either variable to 1, +'yes' or 'true'.

    +
  • +
  • +

    GIT_LFS_SKIP_SMUDGE

    + +

    Sets whether or not Git LFS will skip attempting to convert pointers of files +tracked into their corresponding objects when checked out into a working copy. +If 'true', '1', 'on', or similar, Git LFS will skip the smudge process in both +git lfs smudge and git lfs filter-process. If unset, or set to 'false', +'0', 'off', or similar, Git LFS will smudge files as normal.

    +
  • +
  • +

    GIT_LFS_SKIP_PUSH

    + +

    Sets whether or not Git LFS will attempt to upload new Git LFS object in a +pre-push hook. If 'true', '1', 'on', or similar, Git LFS will skip the +pre-push hook, so no new Git LFS objects will be uploaded. If unset, or set to +'false', '0', 'off', or similar, Git LFS will proceed as normal.

    +
  • +
  • +

    GIT_LFS_SET_LOCKABLE_READONLY +lfs.setlockablereadonly

    + +

    These settings, the first an environment variable and the second a gitconfig +setting, control whether files marked as 'lockable' in git lfs track are +made read-only in the working copy when not locked by the current user. +The default is true; you can disable this behaviour and have all files +writeable by setting either variable to 0, 'no' or 'false'.

    +
  • +
  • +

    lfs.lockignoredfiles

    + +

    This setting controls whether Git LFS will set ignored files that match the +lockable pattern read only as well as tracked files. The default is false; +you can enable this behavior by setting the variable to 1, 'yes', or 'true'.

    +
  • +
  • +

    lfs.defaulttokenttl

    + +

    This setting sets a default token TTL when git-lfs-authenticate does not +include the TTL in the JSON response but still enforces it.

    + +

    Note that this is only necessary for larger repositories hosted on LFS +servers that don't include the TTL.

    +
  • +
+ +

LFSCONFIG

+ +

The .lfsconfig file in a repository is read and interpreted in the same format +as the file stored in .git/config. It allows a subset of keys to be used, +including and limited to:

+ +
    +
  • lfs.allowincompletepush
  • +
  • lfs.fetchexclude
  • +
  • lfs.fetchinclude
  • +
  • lfs.gitprotocol
  • +
  • lfs.locksverify
  • +
  • lfs.pushurl
  • +
  • lfs.skipdownloaderrors
  • +
  • lfs.url
  • +
  • lfs.{*}.access
  • +
  • remote.{name}.lfsurl
  • +
+ +

The set of keys allowed in this file is restricted for security reasons.

+ +

EXAMPLES

+ +
+
Configure a custom LFS endpoint for your repository:
+
+
+ +

git config -f .lfsconfig lfs.url https://lfs.example.com/foo/bar/info/lfs

+ +

SEE ALSO

+ +

git-config(1), git-lfs-install(1), gitattributes(5), gitignore(5).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-config(5)
  5. +
+ +
+ + diff --git a/out/merged/git-lfs-3.2.0/man/html/git-lfs-dedup.1.html b/out/merged/git-lfs-3.2.0/man/html/git-lfs-dedup.1.html new file mode 100644 index 0000000000000000000000000000000000000000..fb5ecb47799b9fa3c44d04b6cd21f7d11a53a365 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/html/git-lfs-dedup.1.html @@ -0,0 +1,102 @@ + + + + + + git-lfs-dedup(1) - Deduplicate Git LFS files + + + + +
+ + + +
    +
  1. git-lfs-dedup(1)
  2. +
  3. +
  4. git-lfs-dedup(1)
  5. +
+ + + +

NAME

+

+ git-lfs-dedup - Deduplicate Git LFS files +

+

SYNOPSIS

+ +

git lfs dedup

+ +

DESCRIPTION

+ +

Deduplicates storage by re-creating working tree files as clones of the files in the Git LFS storage directory +using the operating system's copy-on-write file creation functionality.

+ +

If the operating system or file system don't support copy-on-write file creation, this command exits unsuccessfully.

+ +

This command will also exit without success if any Git LFS extensions are +configured, as these will typically be used to alter the file contents +before they are written to the Git LFS storage directory, and therefore the +working tree files should not be copy-on-write clones of the LFS object files.

+ +

SEE ALSO

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-dedup(1)
  5. +
+ +
+ + diff --git a/out/merged/git-lfs-3.2.0/man/html/git-lfs-env.1.html b/out/merged/git-lfs-3.2.0/man/html/git-lfs-env.1.html new file mode 100644 index 0000000000000000000000000000000000000000..b4d9d07f040fcbf25e3e532a3522b82227a9388f --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/html/git-lfs-env.1.html @@ -0,0 +1,94 @@ + + + + + + git-lfs-env(1) - Display the Git LFS environment + + + + +
+ + + +
    +
  1. git-lfs-env(1)
  2. +
  3. +
  4. git-lfs-env(1)
  5. +
+ + + +

NAME

+

+ git-lfs-env - Display the Git LFS environment +

+

SYNOPSIS

+ +

git lfs env

+ +

DESCRIPTION

+ +

Display the current Git LFS environment.

+ +

SEE ALSO

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-env(1)
  5. +
+ +
+ + diff --git a/out/merged/git-lfs-3.2.0/man/html/git-lfs-ext.1.html b/out/merged/git-lfs-3.2.0/man/html/git-lfs-ext.1.html new file mode 100644 index 0000000000000000000000000000000000000000..24c4771ea9ad126f126493ea36030b93f85518f3 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/html/git-lfs-ext.1.html @@ -0,0 +1,112 @@ + + + + + + git-lfs-ext(1) - View extension details + + + + +
+ + + +
    +
  1. git-lfs-ext(1)
  2. +
  3. +
  4. git-lfs-ext(1)
  5. +
+ + + +

NAME

+

+ git-lfs-ext - View extension details +

+

SYNOPSIS

+ +

git lfs ext list [name...]

+ +

DESCRIPTION

+ +

Git LFS extensions enable the manipulation of files streams +during smudge and clean.

+ +

EXAMPLES

+ +
    +
  • +

    List details for all extensions

    + +

    git lfs ext + git lfs ext list

    +
  • +
  • +

    List details for the specified extensions

    + +

    git lfs ext list 'foo' 'bar'

    +
  • +
+ +

SEE ALSO

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-ext(1)
  5. +
+ +
+ + diff --git a/out/merged/git-lfs-3.2.0/man/html/git-lfs-fetch.1.html b/out/merged/git-lfs-3.2.0/man/html/git-lfs-fetch.1.html new file mode 100644 index 0000000000000000000000000000000000000000..1d9b0b3952213a44fb442f96bb3c253f5c067f91 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/html/git-lfs-fetch.1.html @@ -0,0 +1,268 @@ + + + + + + git-lfs-fetch(1) - Download all Git LFS files for a given ref + + + + +
+ + + +
    +
  1. git-lfs-fetch(1)
  2. +
  3. +
  4. git-lfs-fetch(1)
  5. +
+ + + +

NAME

+

+ git-lfs-fetch - Download all Git LFS files for a given ref +

+

SYNOPSIS

+ +

git lfs fetch [options] [remote [ref...]]

+ +

DESCRIPTION

+ +

Download Git LFS objects at the given refs from the specified remote. See +DEFAULT REMOTE and DEFAULT REFS for what happens if you don't specify.

+ +

This does not update the working copy.

+ +

OPTIONS

+ +
+
+-I paths --include=paths +
+
Specify lfs.fetchinclude just for this invocation; see INCLUDE AND EXCLUDE +
+
+-X paths --exclude=paths +
+
Specify lfs.fetchexclude just for this invocation; see INCLUDE AND EXCLUDE +
+
--recent
+
Download objects referenced by recent branches & commits in addition to those +that would otherwise be downloaded. See RECENT CHANGES +
+
--all
+
Download all objects that are referenced by any commit reachable from the refs +provided as arguments. If no refs are provided, then all refs are fetched. +This is primarily for backup and migration purposes. Cannot be combined with +--recent or --include/--exclude. Ignores any globally configured include and +exclude paths to ensure that all objects are downloaded.
+
+--prune -p +
+
Prune old and unreferenced objects after fetching, equivalent to running +git lfs prune afterwards. See git-lfs-prune(1) for more details.
+
+ +

INCLUDE AND EXCLUDE

+ +

You can configure Git LFS to only fetch objects to satisfy references in certain +paths of the repo, and/or to exclude certain paths of the repo, to reduce the +time you spend downloading things you do not use.

+ +

In your Git configuration or in a .lfsconfig file, you may set either or +both of lfs.fetchinclude and lfs.fetchexclude to comma-separated lists of +paths. If lfs.fetchinclude is defined, Git LFS objects will only be fetched +if their path matches one in that list, and if lfs.fetchexclude is defined, +Git LFS objects will only be fetched if their path does not match one +in that list. Paths are matched using wildcard matching as per gitignore(5).

+ +

Note that using the command-line options -I and -X override the respective +configuration settings. Setting either option to an empty string clears the +value.

+ +

Examples

+ +
    +
  • +

    git config lfs.fetchinclude "textures,images/foo*"

    + +

    This will only fetch objects referenced in paths in the textures folder, and +files called foo* in the images folder

    +
  • +
  • +

    git config lfs.fetchinclude "*.jpg,*.png,*.tga"

    + +

    Only fetch JPG/PNG/TGA files, wherever they are in the repository

    +
  • +
  • +

    git config lfs.fetchexclude "media/reallybigfiles"

    + +

    Don't fetch any LFS objects referenced in the folder media/reallybigfiles, but +fetch everything else

    +
  • +
  • +

    git config lfs.fetchinclude "media"
    +git config lfs.fetchexclude "media/excessive"

    + +

    Only fetch LFS objects in the 'media' folder, but exclude those in one of its +subfolders.

    +
  • +
+ +

DEFAULT REMOTE

+ +

Without arguments, fetch downloads from the default remote. The default remote +is the same as for git fetch, i.e. based on the remote branch you're tracking +first, or origin otherwise.

+ +

DEFAULT REFS

+ +

If no refs are given as arguments, the currently checked out ref is used. In +addition, if enabled, recently changed refs and commits are also +included. See RECENT CHANGES for details.

+ +

RECENT CHANGES

+ +

If the --recent option is specified, or if the gitconfig option +lfs.fetchrecentalways is true, then after the current ref (or those in the +arguments) is fetched, we also search for 'recent' changes to fetch +objects for, so that it's more convenient to checkout or diff those commits +without incurring further downloads.

+ +

What changes are considered 'recent' is based on a number of gitconfig options:

+ +
    +
  • +

    lfs.fetchrecentrefsdays +If non-zero, includes branches which have commits within N days of the current +date. Only local refs are included unless lfs.fetchrecentremoterefs is true. +The default is 7 days.

    +
  • +
  • +

    lfs.fetchrecentremoterefs +If true, fetches remote refs (for the remote you're fetching) as well as local +refs in the recent window. This is useful to fetch objects for remote branches +you might want to check out later. The default is true; if you set this to +false, fetching for those branches will only occur when you either check them +out (losing the advantage of fetch --recent), or create a tracking local +branch separately then fetch again.

    +
  • +
  • +

    lfs.fetchrecentcommitsdays +In addition to fetching at branches, also fetches changes made within N +days of the latest commit on the branch. This is useful if you're often +reviewing recent changes. The default is 0 (no previous changes).

    +
  • +
  • +

    lfs.fetchrecentalways +Always operate as if --recent was provided on the command line.

    +
  • +
+ +

EXAMPLES

+ +
    +
  • +

    Fetch the LFS objects for the current ref from default remote

    + +

    git lfs fetch

    +
  • +
  • +

    Fetch the LFS objects for the current ref AND recent changes from default +remote

    + +

    git lfs fetch --recent

    +
  • +
  • +

    Fetch the LFS objects for the current ref from a secondary remote 'upstream'

    + +

    git lfs fetch upstream

    +
  • +
  • +

    Fetch all the LFS objects from the default remote that are referenced by any +commit in the main and develop branches

    + +

    git lfs fetch --all origin main develop

    +
  • +
  • +

    Fetch the LFS objects for a branch from origin

    + +

    git lfs fetch origin mybranch

    +
  • +
  • +

    Fetch the LFS objects for 2 branches and a commit from origin

    + +

    git lfs fetch origin main mybranch e445b45c1c9c6282614f201b62778e4c0688b5c8

    +
  • +
+ +

SEE ALSO

+ +

git-lfs-checkout(1), git-lfs-pull(1), git-lfs-prune(1), gitconfig(5).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-fetch(1)
  5. +
+ +
+ + diff --git a/out/merged/git-lfs-3.2.0/man/html/git-lfs-filter-process.1.html b/out/merged/git-lfs-3.2.0/man/html/git-lfs-filter-process.1.html new file mode 100644 index 0000000000000000000000000000000000000000..ba44d9098294ecd99832d155f94d076ff325c896 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/html/git-lfs-filter-process.1.html @@ -0,0 +1,127 @@ + + + + + + git-lfs-filter-process(1) - Git filter process that converts between pointer and actual content + + + + +
+ + + +
    +
  1. git-lfs-filter-process(1)
  2. +
  3. +
  4. git-lfs-filter-process(1)
  5. +
+ + + +

NAME

+

+ git-lfs-filter-process - Git filter process that converts between pointer and actual content +

+

SYNOPSIS

+ +

git lfs filter-process +git lfs filter-process --skip

+ +

DESCRIPTION

+ +

Implement the Git process filter API, exchanging handshake messages and then +accepting and responding to requests to either clean or smudge a file.

+ +

filter-process is always run by Git's filter process, and is configured by the +repository's Git attributes.

+ +

In your Git configuration or in a .lfsconfig file, you may set either or +both of lfs.fetchinclude and lfs.fetchexclude to comma-separated lists of +paths. If lfs.fetchinclude is defined, Git LFS pointer files will only be +replaced with the contents of the corresponding Git LFS object file if their +path matches one in that list, and if lfs.fetchexclude is defined, Git LFS +pointer files will only be replaced with the contents of the corresponding +Git LFS object file if their path does not match one in that list. Paths are +matched using wildcard matching as per gitignore(5). Git LFS pointer files +that are not replaced with the contents of their corresponding object files +are simply copied to standard output without change.

+ +

The filter process uses Git's pkt-line protocol to communicate, and is +documented in detail in gitattributes(5).

+ +

OPTIONS

+ +

Without any options, filter-process accepts and responds to requests normally.

+ +
+
--skip
+
Skip automatic downloading of objects on clone or pull.
+
GIT_LFS_SKIP_SMUDGE
+
Disables the smudging process. For more, see: git-lfs-config(5).
+
+ +

SEE ALSO

+ +

git-lfs-clean(1), git-lfs-install(1), git-lfs-smudge(1), gitattributes(5), gitignore(5).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-filter-process(1)
  5. +
+ +
+ + diff --git a/out/merged/git-lfs-3.2.0/man/html/git-lfs-fsck.1.html b/out/merged/git-lfs-3.2.0/man/html/git-lfs-fsck.1.html new file mode 100644 index 0000000000000000000000000000000000000000..b0954e5e180f3f55d22a73036aadbf78c631b23b --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/html/git-lfs-fsck.1.html @@ -0,0 +1,123 @@ + + + + + + git-lfs-fsck(1) - Check GIT LFS files for consistency + + + + +
+ + + +
    +
  1. git-lfs-fsck(1)
  2. +
  3. +
  4. git-lfs-fsck(1)
  5. +
+ + + +

NAME

+

+ git-lfs-fsck - Check GIT LFS files for consistency +

+

SYNOPSIS

+ +

git lfs fsck [options] [revisions]

+ +

DESCRIPTION

+ +

Checks all Git LFS files in the current HEAD for consistency.

+ +

Corrupted files are moved to ".git/lfs/bad".

+ +

The revisions may be specified as either a single committish, in which case only +that commit is inspected; specified as a range of the form A..B (and only this +form), in which case that range is inspected; or omitted entirely, in which case +HEAD (and, for --objects, the index) is examined.

+ +

The default is to perform all checks.

+ +

In your Git configuration or in a .lfsconfig file, you may set +lfs.fetchexclude to a comma-separated list of paths. If lfs.fetchexclude +is defined, then any Git LFS files whose paths match one in that list will +not be checked for consistency. Paths are matched using wildcard matching as +per gitignore(5).

+ +

OPTIONS

+ +
+
--objects
+
Check that each object in HEAD matches its expected hash and that each object +exists on disk.
+
--pointers
+
Check that each pointer is canonical and that each file which should be stored +as a Git LFS file is so stored.
+
+ +

SEE ALSO

+ +

git-lfs-ls-files(1), git-lfs-status(1), gitignore(5).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-fsck(1)
  5. +
+ +
+ + diff --git a/out/merged/git-lfs-3.2.0/man/html/git-lfs-install.1.html b/out/merged/git-lfs-3.2.0/man/html/git-lfs-install.1.html new file mode 100644 index 0000000000000000000000000000000000000000..9b27bf18928bff7c503ee0936710712033f169e3 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/html/git-lfs-install.1.html @@ -0,0 +1,143 @@ + + + + + + git-lfs-install(1) - Install Git LFS configuration. + + + + +
+ + + +
    +
  1. git-lfs-install(1)
  2. +
  3. +
  4. git-lfs-install(1)
  5. +
+ + + +

NAME

+

+ git-lfs-install - Install Git LFS configuration. +

+

SYNOPSIS

+ +

git lfs install options

+ +

DESCRIPTION

+ +

Perform the following actions to ensure that Git LFS is setup properly:

+ +
    +
  • Set up the clean and smudge filters under the name "lfs" in the global Git +config.
  • +
  • Install a pre-push hook to run git-lfs-pre-push(1) for the current repository, +if run from inside one. If "core.hooksPath" is configured in any Git +configuration (and supported, i.e., the installed Git version is at least +2.9.0), then the pre-push hook will be installed to that directory instead.
  • +
+ +

OPTIONS

+ +

Without any options, git lfs install will only setup the "lfs" smudge and clean +filters if they are not already set.

+ +
+
--force
+
Sets the "lfs" smudge and clean filters, overwriting existing values.
+
--local
+
Sets the "lfs" smudge and clean filters in the local repository's git + config, instead of the global git config (~/.gitconfig).
+
--worktree
+
Sets the "lfs" smudge and clean filters in the current working tree's + git config, instead of the global git config (~/.gitconfig) or local + repository's git config ($GIT_DIR/config). + If multiple working trees are in use, the Git config extension + worktreeConfig must be enabled to use this option. + If only one working tree is in use, --worktree has the same effect + as --local. + This option is only available if the installed Git version is at least + 2.20.0 and therefore supports the "worktreeConfig" extension.
+
--manual
+
Print instructions for manually updating your hooks to include git-lfs + functionality. Use this option if git lfs install fails because of existing + hooks and you want to retain their functionality.
+
--system
+
Sets the "lfs" smudge and clean filters in the system git config, e.g. /etc/gitconfig + instead of the global git config (~/.gitconfig).
+
--skip-smudge
+
Skips automatic downloading of objects on clone or pull. This requires a + manual "git lfs pull" every time a new commit is checked out on your + repository.
+
--skip-repo
+
Skips setup of the local repo; use if you want to install the global lfs + filters but not make changes to the current repo.
+
+ +

SEE ALSO

+ +

git-lfs-uninstall(1), git-worktree(1).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-install(1)
  5. +
+ +
+ + diff --git a/out/merged/git-lfs-3.2.0/man/html/git-lfs-lock.1.html b/out/merged/git-lfs-3.2.0/man/html/git-lfs-lock.1.html new file mode 100644 index 0000000000000000000000000000000000000000..3782b204e89656e8ed0b12cf664d6dcb1408f37a --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/html/git-lfs-lock.1.html @@ -0,0 +1,116 @@ + + + + + + git-lfs-lock(1) - Set a file as "locked" on the Git LFS server + + + + +
+ + + +
    +
  1. git-lfs-lock(1)
  2. +
  3. +
  4. git-lfs-lock(1)
  5. +
+ + + +

NAME

+

+ git-lfs-lock - Set a file as "locked" on the Git LFS server +

+

SYNOPSIS

+ +

git lfs lock options path

+ +

DESCRIPTION

+ +

Sets the given file path as "locked" against the Git LFS server, with the +intention of blocking attempts by other users to update the given path. Locking +a file requires the file to exist in the working copy.

+ +

Once locked, LFS will verify that Git pushes do not modify files locked by +other users. See the description of the lfs.<url>.locksverify config key in +git-lfs-config(5) for details.

+ +

OPTIONS

+ +
+
+-r name --remote=name +
+
Specify the Git LFS server to use. Ignored if the lfs.url config key is set.
+
--json
+
Writes lock info as JSON to STDOUT if the command exits successfully. Intended +for interoperation with external tools. If the command returns with a non-zero +exit code, plain text messages will be sent to STDERR.
+
+ +

SEE ALSO

+ +

git-lfs-unlock(1), git-lfs-locks(1).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-lock(1)
  5. +
+ +
+ + diff --git a/out/merged/git-lfs-3.2.0/man/html/git-lfs-locks.1.html b/out/merged/git-lfs-3.2.0/man/html/git-lfs-locks.1.html new file mode 100644 index 0000000000000000000000000000000000000000..ac8b000e116ad55868e5cb8af5daddff2d4a316c --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/html/git-lfs-locks.1.html @@ -0,0 +1,138 @@ + + + + + + git-lfs-locks(1) - Lists currently locked files from the Git LFS server. + + + + +
+ + + +
    +
  1. git-lfs-locks(1)
  2. +
  3. +
  4. git-lfs-locks(1)
  5. +
+ + + +

NAME

+

+ git-lfs-locks - Lists currently locked files from the Git LFS server. +

+

SYNOPSIS

+ +

git lfs locks options

+ +

DESCRIPTION

+ +

Lists current locks from the Git LFS server.

+ +

OPTIONS

+ +
+
+-r name --remote=name +
+
Specify the Git LFS server to use. Ignored if the lfs.url config key is set.
+
+-i <id> --id=<id> +
+
Specifies a lock by its ID. Returns a single result.
+
+-p <path> --path=<path> +
+
Specifies a lock by its path. Returns a single result.
+
--local
+
Lists only our own locks which are cached locally. Skips a remote call.
+
--cached
+
Lists cached locks from the last remote call. Contrary to --local, this will +include locks of other users as well. This option is intended to display the +last known locks in case you are offline. There is no guarantee that locks +on the server have not changed in the meanwhile.
+
--verify
+
Verifies the lock owner on the server and marks our own locks by 'O'. +Own locks are actually held by us and corresponding files can be updated for +the next push. All other locks are held by someone else. +Contrary to --local, this option will also detect locks which are held by us +despite no local lock information being available (e.g. because the file had +been locked from a different clone); +it will also detect 'broken' locks (e.g. if someone else has forcefully +unlocked our files).
+
+-l <num> --limit=<num> +
+
Specifies number of results to return.
+
--json
+
Writes lock info as JSON to STDOUT if the command exits successfully. Intended +for interoperation with external tools. If the command returns with a non-zero +exit code, plain text messages will be sent to STDERR.
+
+ +

SEE ALSO

+ +

git-lfs-lock(1), git-lfs-unlock(1).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-locks(1)
  5. +
+ +
+ + diff --git a/out/merged/git-lfs-3.2.0/man/html/git-lfs-logs.1.html b/out/merged/git-lfs-3.2.0/man/html/git-lfs-logs.1.html new file mode 100644 index 0000000000000000000000000000000000000000..a2328604b550b70fb7694b00c78132a76f2014e9 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/html/git-lfs-logs.1.html @@ -0,0 +1,118 @@ + + + + + + git-lfs-logs(1) - Show errors from the git-lfs command + + + + +
+ + + +
    +
  1. git-lfs-logs(1)
  2. +
  3. +
  4. git-lfs-logs(1)
  5. +
+ + + +

NAME

+

+ git-lfs-logs - Show errors from the git-lfs command +

+

SYNOPSIS

+ +

git lfs logs
+git lfs logs file
+git lfs logs clear
+git lfs logs boomtown

+ +

DESCRIPTION

+ +

Display errors from the git-lfs command. Any time it crashes, the details are +saved to ".git/lfs/logs".

+ +

COMMANDS

+ +
+
clear
+
Clears all of the existing logged errors.
+
boomtown
+
Triggers a dummy exception.
+
+ +

OPTIONS

+ +

Without any options, git lfs logs simply shows the list of error logs.

+ +
+
file
+
Shows the specified error log. Use "last" to show the most recent error.
+
+ +

SEE ALSO

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-logs(1)
  5. +
+ +
+ + diff --git a/out/merged/git-lfs-3.2.0/man/html/git-lfs-ls-files.1.html b/out/merged/git-lfs-3.2.0/man/html/git-lfs-ls-files.1.html new file mode 100644 index 0000000000000000000000000000000000000000..2f92cfe91f037a4ba709e0e98552ada70b6bb4ed --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/html/git-lfs-ls-files.1.html @@ -0,0 +1,141 @@ + + + + + + git-lfs-ls-files(1) - Show information about Git LFS files in the index and working tree + + + + +
+ + + +
    +
  1. git-lfs-ls-files(1)
  2. +
  3. +
  4. git-lfs-ls-files(1)
  5. +
+ + + +

NAME

+

+ git-lfs-ls-files - Show information about Git LFS files in the index and working tree +

+

SYNOPSIS

+ +

git lfs ls-files [ref]
+git lfs ls-files ref ref

+ +

DESCRIPTION

+ +

Display paths of Git LFS files that are found in the tree at the given +reference. If no reference is given, scan the currently checked-out branch. +If two references are given, the LFS files that are modified between the two +references are shown; deletions are not listed.

+ +

An asterisk (*) after the OID indicates a full object, a minus (-) indicates an +LFS pointer.

+ +

OPTIONS

+ +
+
+-l --long +
+
Show the entire 64 character OID, instead of just first 10.
+
+-s --size +
+
Show the size of the LFS object between parenthesis at the end of a line.
+
+-d --debug +
+
Show as much information as possible about a LFS file. This is intended +for manual inspection; the exact format may change at any time.
+
+-a --all +
+
Inspects the full history of the repository, not the current HEAD (or other +provided reference). This will include previous versions of LFS objects that +are no longer found in the current tree.
+
--deleted
+
Shows the full history of the given reference, including objects that have +been deleted.
+
+-I paths --include=paths +
+
Include paths matching only these patterns; see [FETCH SETTINGS].
+
+-X paths --exclude=paths +
+
Exclude paths matching any of these patterns; see [FETCH SETTINGS].
+
+-n --name-only +
+
Show only the lfs tracked file names. +## SEE ALSO
+
+ +

git-lfs-status(1), git-lfs-config(5).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-ls-files(1)
  5. +
+ +
+ + diff --git a/out/merged/git-lfs-3.2.0/man/html/git-lfs-merge-driver.1.html b/out/merged/git-lfs-3.2.0/man/html/git-lfs-merge-driver.1.html new file mode 100644 index 0000000000000000000000000000000000000000..029bd4f5e8a01267c355c28d18588a667727d375 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/html/git-lfs-merge-driver.1.html @@ -0,0 +1,188 @@ + + + + + + git-lfs-merge-driver(1) - Merge text-based LFS files + + + + +
+ + + +
    +
  1. git-lfs-merge-driver(1)
  2. +
  3. +
  4. git-lfs-merge-driver(1)
  5. +
+ + + +

NAME

+

+ git-lfs-merge-driver - Merge text-based LFS files +

+

SYNOPSIS

+ +

git lfs merge-driver options

+ +

DESCRIPTION

+ +

Merge text files stored in Git LFS using the default Git merge machinery, or a +custom merge driver if specified. Note that this, in general, does not support +partial renames or copies because Git does not support them in this case.

+ +

This program is intended to be invoked automatically by Git and not by users +manually. See CONFIGURATION for details on the configuration required for +that.

+ +

OPTIONS

+ +
    +
  • +

    --ancestor path +Specify the file containing the ancestor revision.

    +
  • +
  • +

    --current path +Specify the file containing the current revision.

    +
  • +
  • +

    --marker-size num +Specify the conflict marker size as an integer.

    +
  • +
  • +

    --other path +Specify the file containing the other revision.

    +
  • +
  • +

    --program program +Specify a command, which is passed to the shell after substitution, that +performs the actual merge. If this is not specified, git merge-file is +invoked with appropriate arguments to perform the merge of the file.

    + +

    See CONFIGURATION for the sequences which are substituted here.

    +
  • +
+ +

CONFIGURATION

+ +

Git allows the use of a custom merge driver for files based on the merge +attribute set in .gitattributes. By default, when using git lfs track, this +value is set to lfs.

+ +

Because Git LFS can be used to store both text and binary files and it isn't +always clear which behavior should be used, Git LFS does not enable this merge +driver by default. However, if you know that some or all of your files are text +files, then you can set the merge attribute for those files to lfs-text and +use git config to set the merge driver like so:

+ +

console +$ git config merge.lfs-text.driver 'git lfs merge-driver --ancestor %O --current %A --other %B --marker-size %L --output %A' +

+ +

This tells Git to invoke the custom Git LFS merge driver, which in turn uses +Git's merge machinery, to merge files where the merge attribute is set to +lfs-text. Note that lfs-text here is an example and any syntactically valid +value can be used.

+ +

If you are using a special type of file that needs rules different from Git's +standard merge machinery, you can also specify the --program option, which +is passed to sh after substituting its own percent-encoded escapes:

+ +
    +
  • +%A: the current version
  • +
  • +%B: the other version
  • +
  • +%D: the destination version
  • +
  • +%O: the ancestor version
  • +
  • +%L: the conflict marker size
  • +
+ +

Note that the percent sign must typically be doubled to prevent Git from +substituting its own values here. Therefore, specifying the default behavior +explicitly looks like this:

+ +

console +$ git config merge.lfs-text.driver \ + 'git lfs merge-driver --ancestor %O --current %A --other %B --marker-size %L --output %A --program '\''git merge-file --stdout --marker-size=%%L %%A %%O %%B >%%D'\''' +

+ +

The exit status from the custom command should be zero on success or non-zero on +conflicts or other failure.

+ +

Note that if no merge driver is specified for the value of the merge attribute +(as is the case by default with merge=lfs), then the default Git merge +strategy is used. For LFS files, this means that Git will try to merge the +pointer files, which usually is not useful.

+ +

SEE ALSO

+ +

git-merge(1), git-merge-file(1), gitattributes(5)

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-merge-driver(1)
  5. +
+ +
+ + diff --git a/out/merged/git-lfs-3.2.0/man/html/git-lfs-migrate.1.html b/out/merged/git-lfs-3.2.0/man/html/git-lfs-migrate.1.html new file mode 100644 index 0000000000000000000000000000000000000000..e2eb84f41d9edbb8fe0afb6c20277d7ec7ae2964 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/html/git-lfs-migrate.1.html @@ -0,0 +1,590 @@ + + + + + + git-lfs-migrate(1) - Migrate history to or from Git LFS + + + + +
+ + + +
    +
  1. git-lfs-migrate(1)
  2. +
  3. +
  4. git-lfs-migrate(1)
  5. +
+ + + +

NAME

+

+ git-lfs-migrate - Migrate history to or from Git LFS +

+

SYNOPSIS

+ +

git lfs migrate mode [options] [--] [branch ...]

+ +

DESCRIPTION

+ +

Convert files in a Git repository to or from Git LFS pointers, or +summarize Git file sizes by file type. The import mode converts Git +files (i.e., blobs) to Git LFS, while the export mode does the reverse, +and the info mode provides an informational summary which may be useful +in deciding which files to import or export.

+ +

In all modes, by default git lfs migrate operates only on the currently +checked-out branch, and only on files (of any size and type) added in +commits which do not exist on any remote. Multiple options are available +to override these defaults.

+ +

When converting files to or from Git LFS, the git lfs migrate command will +only make changes to your local repository and working copy, never any +remotes. This is intentional as the import and export modes are +generally "destructive" in the sense that they rewrite your Git history, +changing commits and generating new commit SHAs. (The exception is the +"no-rewrite" import sub-mode; see IMPORT WITHOUT REWRITING HISTORY +for details.)

+ +

You should therefore always first commit or stash any uncommitted work +before using the import or export modes, and then validate the result of +the migration before pushing the changes to your remotes, for instance by +running the info mode and by examining your rewritten commit history.

+ +

Once you are satisfied with the changes, you will need to force-push the +new Git history of any rewritten branches to all your remotes. This is +a step which should be taken with care, since you will be altering the +Git history on your remotes.

+ +

To examine or modify files in branches other than the currently checked-out +one, branch refs may be specified directly, or provided in one or more +--include-ref options. They may also be excluded by prefixing them +with ^ or providing them in --exclude-ref options. Use the --everything +option to specify that all refs should be examined, including all remote refs. +See INCLUDE AND EXCLUDE REFERENCES for details.

+ +

For the info and import modes, all file types are considered by default; +while useful in the info mode, this is often not desirable when importing, +so either filename patterns (pathspecs) or the --fixup option should +normally be specified in that case. (At least one include pathspec is +required for the export mode.) Pathspecs may be defined using the +--include and --exclude options (-I and -X for short), as described +in INCLUDE AND EXCLUDE.

+ +

As typical Git LFS usage depends on tracking specific file types using +filename patterns defined in .gitattributes files, the git lfs migrate +command will examine, create, and modify .gitattributes files as +necessary. The .gitattributes files will always be assigned the default +read/write permissions mode (i.e., without execute permissions). Any +symbolic links with that name will cause the migration to halt prematurely.

+ +

The import mode (see IMPORT) will convert Git objects of the file types +specified (e.g., with --include) to Git LFS pointers, and will add entries +for those file types to .gitattributes files, creating those files if they +do not exist. The result should be as if git lfs track commands had been +run at the points in your Git history corresponding to where each type of +converted file first appears. The exception is if the --fixup option is +given, in which case the import mode will only examine any existing +.gitattributes files and then convert Git objects which should be tracked +by Git LFS but are not yet.

+ +

The export mode (see EXPORT) works as the reverse operation to the +import mode, converting any Git LFS pointers that match the file types +specified with --include, which must be given at least once. Note that +.gitattributes entries will not be removed, nor will the files; instead, +the export mode inserts "do not track" entries similar to those created +by the git lfs untrack command. The --remote option is available in +the export mode to specify the remote from which Git LFS objects should +be fetched if they do not exist in the local Git LFS object cache; if not +provided, origin is used by default.

+ +

The info mode (see INFO) summarizes by file type (i.e., by filename +extension) the total number and size of files in a repository. Note that +like the other two modes, by default the info mode operates only on +the currently checked-out branch and only on commits which do not exist on +any remote, so to get a summary of the entire repository across all branches, +use the --everything option. If objects have already been converted to +Git LFS pointers, then by default the size of the referenced objects is +totaled and reported separately. You may also choose to ignore them by +using --pointers=ignore or to treat the pointers as files by using +--pointers=no-follow. (The latter option is akin to how existing Git LFS +pointers were handled by the info mode in prior versions of Git LFS).

+ +

When using the --everything option, take note that it means all refs +(local and remote) will be considered, but not necessarily all file types. +The import and info modes consider all file types by default, although +the --include and --exclude options constrain this behavior. Also +note that after importing across all branches with the --everything option +(and then checking to ensure the results are satisfactory!) it may be +convenient to update multiple branches on your remotes by using the --all +option to git push.

+ +

Unless the --skip-fetch option is given, git lfs migrate always begins by +fetching updated lists of refs from all the remotes returned by git remote, +but as noted above, after making changes to your local Git history while +converting objects, it will never automatically push those changes to your +remotes.

+ +

MODES

+ +
    +
  • +

    info + Show information about repository size. See INFO.

    +
  • +
  • +

    import + Convert Git objects to Git LFS pointers. See IMPORT and + IMPORT WITHOUT REWRITING HISTORY.

    +
  • +
  • +

    export + Convert Git LFS pointers to Git objects. See EXPORT.

    +
  • +
+ +

OPTIONS

+ +
+
+-I paths --include=paths +
+
See INCLUDE AND EXCLUDE.
+
+-X paths --exclude=paths +
+
See INCLUDE AND EXCLUDE.
+
+--include-ref=refname +
+
See INCLUDE AND EXCLUDE REFERENCES.
+
+--exclude-ref=refname +
+
See INCLUDE AND EXCLUDE REFERENCES.
+
--skip-fetch
+
Assumes that the known set of remote references is complete, and should not + be refreshed when determining the set of "un-pushed" commits to migrate. Has + no effect when combined with --include-ref or --exclude-ref.
+
--everything
+
See INCLUDE AND EXCLUDE REFERENCES. + +

Note: Git refs are "case-sensitive" on all platforms in "packed from" (see + git-pack-refs(1)). On "case-insensitive" file systems, e.g. NTFS on Windows + or default APFS on macOS, git-lfs-migrate(1) would only migrate the first + ref if two or more refs are equal except for upper/lower case letters.

+
+
--yes
+
Assume a yes answer to any prompts, permitting noninteractive use. + Currently, the only such prompt is the one asking whether to overwrite + (destroy) any working copy changes. Thus, specifying this option may cause + data loss if you are not careful.
+
[branch ...]
+
Migrate only the set of branches listed. If not given, git-lfs-migrate(1) + will migrate the currently checked out branch. + +

References beginning with ^ will be excluded, whereas branches that do not + begin with ^ will be included.

+ +

If any of --include-ref or --exclude-ref are given, the checked out + branch will not be appended, but branches given explicitly will be appended.

+
+
+ +

INFO

+ +

The info mode summarizes the sizes of file objects present in the Git history. +It supports all the core migrate options and these additional ones:

+ +
    +
  • +

    --above=<size> + Only count files whose individual filesize is above the given size. size + may be specified as a number of bytes, or a number followed by a storage + unit, e.g., "1b", "20 MB", "3 TiB", etc.

    + +

    If a set of files sharing a common extension has no files in that set whose + individual size is above the given --above no files no entry for that set + will be shown.

    +
  • +
  • +

    --top=<n> + Only display the top n entries, ordered by how many total files match the + given pathspec. The default is to show only the top 5 entries. When + existing Git LFS objects are found, an extra, separate "LFS Objects" line + is output in addition to the top n entries, unless the --pointers + option is used to change this behavior.

    +
  • +
  • +

    --unit=<unit> + Format the number of bytes in each entry as a quantity of the storage unit + provided. Valid units include: + * b, kib, mib, gib, tib, pib - for IEC storage units + * b, kb, mb, gb, tb, pb - for SI storage units

    + +

    If a --unit is not specified, the largest unit that can fit the number of + counted bytes as a whole number quantity is chosen.

    +
  • +
  • +

    --pointers=[follow|no-follow|ignore] + Treat existing Git LFS pointers in the history according to one of three + alternatives. In the default follow case, if any pointers are found, + an additional separate "LFS Objects" line item is output which summarizes + the total number and size of the Git LFS objects referenced by pointers. + In the ignore case, any pointers are simply ignored, while the no-follow + case replicates the behavior of the info mode in older Git LFS versions + and treats any pointers it finds as if they were regular files, so the + output totals only include the contents of the pointers, not the contents + of the objects to which they refer.

    +
  • +
  • +

    --fixup + Infer --include and --exclude filters on a per-commit basis based on the + .gitattributes files in a repository. In practice, this option counts any + filepaths which should be tracked by Git LFS according to the repository's + .gitattributes file(s), but aren't already pointers. The .gitattributes + files are not reported, in contrast to the normal output of the info mode. + This option is incompatible with explicitly given --include, --exclude + filters and with any --pointers setting other than ignore, hence + --fixup implies --pointers=ignore if it is not explicitly set.

    +
  • +
+ +

The format of the output shows the filename pattern, the total size of the +file objects (excluding those below the --above threshold, if one was +defined), and the ratio of the number of files above the threshold to the +total number of files; this ratio is also shown as a percentage. For example:

+ +

+*.gif 93 MB 9480/10504 files(s) 90% +*.png 14 MB 1732/1877 files(s) 92% +

+ +

By default only the top five entries are shown, but --top allows for +more or fewer to be output as desired.

+ +

IMPORT

+ +

The import mode migrates objects present in the Git history to pointer +files tracked and stored with Git LFS. It supports all the core migrate +options and these additional ones:

+ +
    +
  • +

    --verbose + Print the commit oid and filename of migrated files to STDOUT.

    +
  • +
  • +

    --above=<size> + Only migrate files whose individual filesize is above the given size. size + may be specified as a number of bytes, or a number followed by a storage + unit, e.g., "1b", "20 MB", "3 TiB", etc. This option cannot be used with + the --include, --exclude, and --fixup options.

    +
  • +
  • +

    --object-map=<path> + Write to path a file with the mapping of each rewritten commits. The file + format is CSV with this pattern: OLD-SHA,NEW-SHA

    +
  • +
  • +

    --no-rewrite + Migrate objects to Git LFS in a new commit without rewriting Git + history. Please note that when this option is used, the migrate import + command will expect a different argument list, specialized options will + become available, and the core migrate options will be ignored. See + IMPORT WITHOUT REWRITING HISTORY.

    +
  • +
  • +

    --fixup + Infer --include and --exclude filters on a per-commit basis based on the + .gitattributes files in a repository. In practice, this option imports any + filepaths which should be tracked by Git LFS according to the repository's + .gitattributes file(s), but aren't already pointers. This option is + incompatible with explicitly given --include, --exclude filters.

    +
  • +
+ +

If --no-rewrite is not provided and --include or --exclude (-I, -X, +respectively) are given, the .gitattributes will be modified to include any +new filepath patterns as given by those flags.

+ +

If --no-rewrite is not provided and neither of those flags are given, the +gitattributes will be incrementally modified to include new filepath extensions +as they are rewritten in history.

+ +

IMPORT WITHOUT REWRITING HISTORY

+ +

The import mode has a special sub-mode enabled by the --no-rewrite flag. +This sub-mode will migrate objects to pointers as in the base import +mode, but will do so in a new commit without rewriting Git history. When using +this sub-mode, the base migrate options, such as --include-ref, will be +ignored, as will those for the base import mode. The migrate command will +also take a different argument list. As a result of these changes, +--no-rewrite will only operate on the current branch - any other interested +branches must have the generated commit merged in.

+ +

The --no-rewrite sub-mode supports the following options and arguments:

+ +
    +
  • +

    -m <message> --message=<message> + Specifies a commit message for the newly created commit.

    +
  • +
  • +

    [file ...] + The list of files to import. These files must be tracked by patterns + specified in the gitattributes.

    +
  • +
+ +

If --message is given, the new commit will be created with the provided +message. If no message is given, a commit message will be generated based on the +file arguments.

+ +

EXPORT

+ +

The export mode migrates Git LFS pointer files present in the Git history out +of Git LFS, converting them into their corresponding object files. It supports +all the core migrate options and these additional ones:

+ +
    +
  • +

    --verbose + Print the commit oid and filename of migrated files to STDOUT.

    +
  • +
  • +

    --object-map=<path> + Write to path a file with the mapping of each rewritten commit. The file + format is CSV with this pattern: OLD-SHA,NEW-SHA

    +
  • +
  • +

    --remote=<git-remote> + Download LFS objects from the provided git-remote during the export. If + not provided, defaults to origin.

    +
  • +
+ +

The export mode requires at minimum a pattern provided with the --include +argument to specify which files to export. Files matching the --include +patterns will be removed from Git LFS, while files matching the --exclude +patterns will retain their Git LFS status. The export command will modify the +.gitattributes to set/unset any filepath patterns as given by those flags.

+ +

INCLUDE AND EXCLUDE

+ +

You can specify that git lfs migrate should only convert files whose +pathspec matches the --include glob patterns and does not match the +--exclude glob patterns, either to reduce total migration time or to only +migrate part of your repo. Multiple patterns may be given using commas +as delimiters.

+ +

Pattern matching is done so as to be functionally equivalent to the pattern +matching format of .gitattributes. In addition to simple file extension +matches (e.g., *.gif) patterns may also specify directory paths, in which +case the path/** format may be used to match recursively.

+ +

Note that this form of pattern matching for the --include and --exclude +options used by the git lfs migrate command is unique among the suite of +git lfs commands. Other commands which also take these options, such as +git lfs ls-files, use the gitignore(5) form of pattern matching instead.

+ +

INCLUDE AND EXCLUDE REFERENCES

+ +

You can specify that git lfs migrate should only convert files added +in commits reachable from certain references, namely those defined using one +or more --include-ref options, and should ignore files in commits reachable +from references defined in --exclude-ref options.

+ +

+ D---E---F + / \ + A---B------C refs/heads/my-feature + \ \ + \ refs/heads/main + \ + refs/remotes/origin/main +

+ +

In the above configuration, the following commits are reachable by each ref:

+ +

+refs/heads/main: C, B, A +refs/heads/my-feature: F, E, D, B, A +refs/remote/origin/main: A +

+ +

The following git lfs migrate options would, therefore, include commits +F, E, D, C, and B, but exclude commit A:

+ +

+ --include-ref=refs/heads/my-feature + --include-ref=refs/heads/main + --exclude-ref=refs/remotes/origin/main +

+ +

The presence of flag --everything indicates that all local and remote +references should be migrated.

+ +

EXAMPLES

+ +

Migrate unpushed commits

+ +

A common use case for the migrate command is to convert large Git objects to +LFS before pushing your commits. By default, it only scans commits that don't +exist on any remote, so long as the repository is non-bare.

+ +

First, run git lfs migrate info to list the file types taking up the most +space in your repository:

+ +

+$ git lfs migrate info +migrate: Fetching remote refs: ..., done +migrate: Sorting commits: ..., done +migrate: Examining commits: 100% (1/1), done +*.mp3 284 MB 1/1 files(s) 100% +*.pdf 42 MB 8/8 files(s) 100% +*.psd 9.8 MB 15/15 files(s) 100% +*.ipynb 6.9 MB 6/6 files(s) 100% +*.csv 5.8 MB 2/2 files(s) 100% +

+ +

Now, you can run git lfs migrate import to convert some file types to LFS:

+ +

+$ git lfs migrate import --include="*.mp3,*.psd" +migrate: Fetching remote refs: ..., done +migrate: Sorting commits: ..., done +migrate: Rewriting commits: 100% (1/1), done + main d2b959babd099fe70da1c1512e2475e8a24de163 -> 136e706bf1ae79643915c134e17a6c933fd53c61 +migrate: Updating refs: ..., done +

+ +

If after conversion you find that some files in your working directory have +been replaced with Git LFS pointers, this is normal, and the working copies +of these files can be repopulated with their full expected contents by using +git lfs checkout.

+ +

Migrate local history

+ +

You can also migrate the entire history of your repository:

+ +

``` +# Check for large files and existing Git LFS objects in your local main branch +$ git lfs migrate info --include-ref=main

+ +

Check for large files and existing Git LFS objects in every branch

+

$ git lfs migrate info --everything

+ +

Check for large files in every branch, ignoring any existing Git LFS objects,

+

# and listing the top 100 or fewer results +$ git lfs migrate info --everything --pointers=ignore --top=100 +```

+ +

The same flags will work in import mode:

+ +

``` +# Convert all zip files in your main branch +$ git lfs migrate import --include-ref=main --include="*.zip"

+ +

Convert all zip files in every local branch

+

$ git lfs migrate import --everything --include="*.zip"

+ +

Convert all files over 100K in every local branch

+

$ git lfs migrate import --everything --above=100Kb +```

+ +

Note: This will require a force-push to any existing Git remotes. Using +the --all option when force-pushing may be convenient if many refs were +updated, e.g., after importing to Git LFS with the --everything option.

+ +

Migrate without rewriting local history

+ +

You can also migrate files without modifying the existing history of your +repository. Note that in the examples below, files in subdirectories are not +included because they are not explicitly specified.

+ +

Without a specified commit message:

+ +

+$ git lfs migrate import --no-rewrite test.zip *.mp3 *.psd +

+ +

With a specified commit message:

+ +

+$ git lfs migrate import --no-rewrite \ + -m "Import test.zip, .mp3, .psd files in root of repo" \ + test.zip *.mp3 *.psd +

+ +

SEE ALSO

+ +

git-lfs-checkout(1), git-lfs-ls-files(1), git-lfs-track(1), +git-lfs-untrack(1), gitattributes(5), gitignore(5).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-migrate(1)
  5. +
+ +
+ + diff --git a/out/merged/git-lfs-3.2.0/man/html/git-lfs-pointer.1.html b/out/merged/git-lfs-3.2.0/man/html/git-lfs-pointer.1.html new file mode 100644 index 0000000000000000000000000000000000000000..276503496572ab14db7fa78b739928c85f9c4af6 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/html/git-lfs-pointer.1.html @@ -0,0 +1,125 @@ + + + + + + git-lfs-pointer(1) - Build, compare, and check pointers + + + + +
+ + + +
    +
  1. git-lfs-pointer(1)
  2. +
  3. +
  4. git-lfs-pointer(1)
  5. +
+ + + +

NAME

+

+ git-lfs-pointer - Build, compare, and check pointers +

+

SYNOPSIS

+ +

git lfs pointer --file=path/to/file
+git lfs pointer --file=path/to/file --pointer=path/to/pointer
+git lfs pointer --file=path/to/file --stdin +git lfs pointer --check --file=path/to/file

+ +

Description

+ +

Builds and optionally compares generated pointer files to ensure consistency +between different Git LFS implementations.

+ +

OPTIONS

+ +
+
--file
+
A local file to build the pointer from.
+
--pointer
+
A local file including the contents of a pointer generated from another + implementation. This is compared to the pointer generated from --file.
+
--stdin
+
Reads the pointer from STDIN to compare with the pointer generated from + --file.
+
--check
+
Reads the pointer from STDIN (if --stdin is given) or the filepath (if + --file) is given. If neither or both of --stdin and --file are given, + the invocation is invalid. Exits 0 if the data read is a valid Git LFS + pointer. Exits 1 otherwise.
+
+--strict:
+
+
--no-strict
+
In conjunction with --check, --strict verifies that the pointer is + canonical; that is, it would be the one created by Git LFS. If it is not, + exits 2. The default, for backwards compatibility, is --no-strict, but + this may change in a future version.
+
+ +

SEE ALSO

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-pointer(1)
  5. +
+ +
+ + diff --git a/out/merged/git-lfs-3.2.0/man/html/git-lfs-post-checkout.1.html b/out/merged/git-lfs-3.2.0/man/html/git-lfs-post-checkout.1.html new file mode 100644 index 0000000000000000000000000000000000000000..c2a85cfa8a2adfc8c735529afbf1908324071984 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/html/git-lfs-post-checkout.1.html @@ -0,0 +1,98 @@ + + + + + + git-lfs-post-checkout(1) - Git post-checkout hook implementation + + + + +
+ + + +
    +
  1. git-lfs-post-checkout(1)
  2. +
  3. +
  4. git-lfs-post-checkout(1)
  5. +
+ + + +

NAME

+

+ git-lfs-post-checkout - Git post-checkout hook implementation +

+

SYNOPSIS

+ +

git lfs post-checkout rev_before ref_after is_branch_checkout

+ +

DESCRIPTION

+ +

Responds to Git post-checkout events. It makes sure that any files which are +marked as lockable by git lfs track are read-only in the working copy, if +not currently locked by the local user.

+ +

SEE ALSO

+ +

git-lfs-track(1)

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-post-checkout(1)
  5. +
+ +
+ + diff --git a/out/merged/git-lfs-3.2.0/man/html/git-lfs-post-commit.1.html b/out/merged/git-lfs-3.2.0/man/html/git-lfs-post-commit.1.html new file mode 100644 index 0000000000000000000000000000000000000000..12cace66629a086d3c969df4660ec9646ccbd70e --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/html/git-lfs-post-commit.1.html @@ -0,0 +1,103 @@ + + + + + + git-lfs-post-commit(1) - Git post-commit hook implementation + + + + +
+ + + +
    +
  1. git-lfs-post-commit(1)
  2. +
  3. +
  4. git-lfs-post-commit(1)
  5. +
+ + + +

NAME

+

+ git-lfs-post-commit - Git post-commit hook implementation +

+

SYNOPSIS

+ +

git lfs post-commit

+ +

DESCRIPTION

+ +

Responds to Git post-commit events. It makes sure that any files which are +marked as lockable by git lfs track are read-only in the working copy, if +not currently locked by the local user.

+ +

Where the git lfs post-merge command, which has a similar purpose, must +examine all files in the working copy, git lfs post-commit can limit +itself checking only those files which have changed in HEAD. It primarily +handles newly added lockable files which have not yet been made read-only.

+ +

SEE ALSO

+ +

git-lfs-post-merge(1), git-lfs-track(1)

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-post-commit(1)
  5. +
+ +
+ + diff --git a/out/merged/git-lfs-3.2.0/man/html/git-lfs-post-merge.1.html b/out/merged/git-lfs-3.2.0/man/html/git-lfs-post-merge.1.html new file mode 100644 index 0000000000000000000000000000000000000000..0b8c8f4d74fb25f3cf338347850221d18b930b36 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/html/git-lfs-post-merge.1.html @@ -0,0 +1,98 @@ + + + + + + git-lfs-post-merge(1) - Git post-merge hook implementation + + + + +
+ + + +
    +
  1. git-lfs-post-merge(1)
  2. +
  3. +
  4. git-lfs-post-merge(1)
  5. +
+ + + +

NAME

+

+ git-lfs-post-merge - Git post-merge hook implementation +

+

SYNOPSIS

+ +

git lfs post-merge is_squash

+ +

DESCRIPTION

+ +

Responds to Git post-merge events. It makes sure that any files which are +marked as lockable by git lfs track are read-only in the working copy, if +not currently locked by the local user.

+ +

SEE ALSO

+ +

git-lfs-track(1)

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-post-merge(1)
  5. +
+ +
+ + diff --git a/out/merged/git-lfs-3.2.0/man/html/git-lfs-pre-push.1.html b/out/merged/git-lfs-3.2.0/man/html/git-lfs-pre-push.1.html new file mode 100644 index 0000000000000000000000000000000000000000..b2ef773add8efbadaf1177864dcbf31c701ef53a --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/html/git-lfs-pre-push.1.html @@ -0,0 +1,119 @@ + + + + + + git-lfs-pre-push(1) - Git pre-push hook implementation + + + + +
+ + + +
    +
  1. git-lfs-pre-push(1)
  2. +
  3. +
  4. git-lfs-pre-push(1)
  5. +
+ + + +

NAME

+

+ git-lfs-pre-push - Git pre-push hook implementation +

+

SYNOPSIS

+ +

git lfs pre-push remote [remoteurl]

+ +

DESCRIPTION

+ +

Responds to Git pre-hook events. It reads the range of commits from STDIN, in +the following format:

+ +
<local-ref> SP <local-sha1> SP <remote-ref> SP <remote-sha1> \n
+
+ +

It also takes the remote name and URL as arguments.

+ +

If any of those Git objects are associated with Git LFS objects, those +objects will be pushed to the Git LFS API.

+ +

In the case of pushing a new branch, the list of Git objects will be all of +the Git objects in this branch.

+ +

In the case of deleting a branch, no attempts to push Git LFS objects will be +made.

+ +

OPTIONS

+ +
+
GIT_LFS_SKIP_PUSH
+
Do nothing on pre-push. For more, see: git-lfs-config(5).
+
+ +

SEE ALSO

+ +

git-lfs-clean(1), git-lfs-push(1).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-pre-push(1)
  5. +
+ +
+ + diff --git a/out/merged/git-lfs-3.2.0/man/html/git-lfs-prune.1.html b/out/merged/git-lfs-3.2.0/man/html/git-lfs-prune.1.html new file mode 100644 index 0000000000000000000000000000000000000000..d215726dfa4a7eef61ea0e2bab40ff97bf8fcbe3 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/html/git-lfs-prune.1.html @@ -0,0 +1,245 @@ + + + + + + git-lfs-prune(1) - Delete old LFS files from local storage + + + + +
+ + + +
    +
  1. git-lfs-prune(1)
  2. +
  3. +
  4. git-lfs-prune(1)
  5. +
+ + + +

NAME

+

+ git-lfs-prune - Delete old LFS files from local storage +

+

SYNOPSIS

+ +

git lfs prune options

+ +

DESCRIPTION

+ +

Deletes local copies of LFS files which are old, thus freeing up disk space. +Prune operates by enumerating all the locally stored objects, and then deleting +any which are not referenced by at least ONE of the following:

+ +
    +
  • the current checkout
  • +
  • all existing stashes
  • +
  • a 'recent branch'; see RECENT FILES +
  • +
  • a 'recent commit' on the current branch or recent branches; see RECENT FILES +
  • +
  • a commit which has not been pushed; see UNPUSHED LFS FILES +
  • +
  • any other worktree checkouts; see git-worktree(1) +
  • +
+ +

In general terms, prune will delete files you're not currently using and which +are not 'recent', so long as they've been pushed i.e. the local copy is not the +only one.

+ +

The reflog is not considered, only commits. Therefore LFS objects that are +only referenced by orphaned commits are always deleted.

+ +

Note: you should not run git lfs prune if you have different repositories +sharing the same custom storage directory; see git-lfs-config(5) for more +details about lfs.storage option.

+ +

In your Git configuration or in a .lfsconfig file, you may set +lfs.fetchexclude to a comma-separated list of paths. If lfs.fetchexclude +is defined, then any Git LFS files whose paths match one in that list will +be pruned unless they are referenced by a stash or an unpushed commit. +Paths are matched using wildcard matching as per gitignore(5).

+ +

OPTIONS

+ +
    +
  • +

    --dry-run -d +Don't actually delete anything, just report on what would have been done

    +
  • +
  • +

    --force -f +Prune all objects except unpushed objects, including objects required for +currently checked out refs. Implies --recent.

    +
  • +
  • +

    --recent +Prune even objects that would normally be preserved by the configuration +options specified below in RECENT FILES.

    +
  • +
  • +

    --verify-remote -c +Contact the remote and check that copies of the files we would delete +definitely exist before deleting. See VERIFY REMOTE.

    +
  • +
  • +

    --no-verify-remote +Disables remote verification if lfs.pruneverifyremotealways was enabled in +settings. See VERIFY REMOTE.

    +
  • +
  • +

    --verbose -v +Report the full detail of what is/would be deleted.

    +
  • +
+ +

RECENT FILES

+ +

Prune won't delete LFS files referenced by 'recent' commits, in case you want +to use them again without having to download. The definition of 'recent' is +derived from the one used by git-lfs-fetch(1) to download recent objects with +the --recent option, with an offset of a number of days (default 3) to ensure +that we always keep files you download for a few days.

+ +

Here are the git-config(1) settings that control this behaviour:

+ +
    +
  • +

    lfs.pruneoffsetdays
    +The number of extra days added to the fetch recent settings when using them +to decide when to prune. So for a reference to be considered old enough to +prune, it has to be this many days older than the oldest reference that would +be downloaded via git lfs fetch --recent. Only used if the relevant +fetch recent 'days' setting is non-zero. Default 3 days.

    +
  • +
  • +

    lfs.fetchrecentrefsdays
    +lfs.fetchrecentremoterefs
    +lfs.fetchrecentcommitsdays
    +These have the same meaning as git-lfs-fetch(1) with the --recent option, +they are used as a base for the offset above. Anything which falls outside +of this offsetted window is considered old enough to prune. If a day value is +zero, that condition is not used at all to retain objects and they will be +pruned.

    +
  • +
+ +

UNPUSHED LFS FILES

+ +

When the only copy of an LFS file is local, and it is still reachable from any +reference, that file can never be pruned, regardless of how old it is.

+ +

To determine whether an LFS file has been pushed, we check the difference +between local refs and remote refs; where the local ref is ahead, any LFS files +referenced in those commits is unpushed and will not be deleted. This works +because the LFS pre-push hook always ensures that LFS files are pushed before +the remote branch is updated.

+ +

See DEFAULT REMOTE, for which remote is considered 'pushed' for pruning +purposes.

+ +

VERIFY REMOTE

+ +

The --verify-remote option calls the remote to ensure that any LFS files to be +deleted have copies on the remote before actually deleting them.

+ +

Usually the check performed by UNPUSHED LFS FILES is enough to determine that +files have been pushed, but if you want to be extra sure at the expense of extra +overhead you can make prune actually call the remote API and verify the +presence of the files you're about to delete locally. See DEFAULT REMOTE for +which remote is checked.

+ +

You can make this behaviour the default by setting lfs.pruneverifyremotealways +to true.

+ +

In addition to the overhead of calling the remote, using this option also +requires prune to distinguish between totally unreachable files (e.g. those that +were added to the index but never committed, or referenced only by orphaned +commits), and files which are still referenced, but by commits which are +prunable. This makes the prune process take longer.

+ +

DEFAULT REMOTE

+ +

When identifying UNPUSHED LFS FILES and performing VERIFY REMOTE, a single +remote, 'origin', is normally used as the reference. This one remote is +considered canonical; even if you use multiple remotes, you probably want to +retain your local copies until they've made it to that remote. 'origin' is used +by default because that will usually be a main central repo, or your fork of +it - in both cases that's a valid remote backup of your work. If origin doesn't +exist then by default nothing will be pruned because everything is treated as +'unpushed'.

+ +

You can alter the remote via git config: lfs.pruneremotetocheck. Set this +to a different remote name to check that one instead of 'origin'.

+ +

SEE ALSO

+ +

git-lfs-fetch(1), gitignore(5).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-prune(1)
  5. +
+ +
+ + diff --git a/out/merged/git-lfs-3.2.0/man/html/git-lfs-pull.1.html b/out/merged/git-lfs-3.2.0/man/html/git-lfs-pull.1.html new file mode 100644 index 0000000000000000000000000000000000000000..e591d2681225dfc952919aacb399bd99d7a254e2 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/html/git-lfs-pull.1.html @@ -0,0 +1,143 @@ + + + + + + git-lfs-pull(1) - Download all Git LFS files for current ref &amp; checkout + + + + +
+ + + +
    +
  1. git-lfs-pull(1)
  2. +
  3. +
  4. git-lfs-pull(1)
  5. +
+ + + +

NAME

+

+ git-lfs-pull - Download all Git LFS files for current ref & checkout +

+

SYNOPSIS

+ +

git lfs pull [options] [remote]

+ +

DESCRIPTION

+ +

Download Git LFS objects for the currently checked out ref, and update +the working copy with the downloaded content if required.

+ +

This is equivalent to running the following 2 commands:

+ +

git lfs fetch [options] [remote] +git lfs checkout

+ +

OPTIONS

+ +
+
+-I paths --include=paths +
+
Specify lfs.fetchinclude just for this invocation; see INCLUDE AND EXCLUDE +
+
+-X paths --exclude=paths +
+
Specify lfs.fetchexclude just for this invocation; see INCLUDE AND EXCLUDE +
+
+ +

INCLUDE AND EXCLUDE

+ +

You can configure Git LFS to only fetch objects to satisfy references in certain +paths of the repo, and/or to exclude certain paths of the repo, to reduce the +time you spend downloading things you do not use.

+ +

In your Git configuration or in a .lfsconfig file, you may set either or +both of lfs.fetchinclude and lfs.fetchexclude to comma-separated lists of +paths. If lfs.fetchinclude is defined, Git LFS objects will only be fetched +if their path matches one in that list, and if lfs.fetchexclude is defined, +Git LFS objects will only be fetched if their path does not match one +in that list. Paths are matched using wildcard matching as per gitignore(5).

+ +

Note that using the command-line options -I and -X override the respective +configuration settings. Setting either option to an empty string clears the +value.

+ +

DEFAULT REMOTE

+ +

Without arguments, pull downloads from the default remote. The default remote is +the same as for git pull, i.e. based on the remote branch you're tracking +first, or origin otherwise.

+ +

SEE ALSO

+ +

git-lfs-fetch(1), git-lfs-checkout(1), gitignore(5).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-pull(1)
  5. +
+ +
+ + diff --git a/out/merged/git-lfs-3.2.0/man/html/git-lfs-push.1.html b/out/merged/git-lfs-3.2.0/man/html/git-lfs-push.1.html new file mode 100644 index 0000000000000000000000000000000000000000..64b12c82534b118bb1e578836f077858b1808506 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/html/git-lfs-push.1.html @@ -0,0 +1,120 @@ + + + + + + git-lfs-push(1) - Push queued large files to the Git LFS endpoint + + + + +
+ + + +
    +
  1. git-lfs-push(1)
  2. +
  3. +
  4. git-lfs-push(1)
  5. +
+ + + +

NAME

+

+ git-lfs-push - Push queued large files to the Git LFS endpoint +

+

SYNOPSIS

+ +

git lfs push options remote [ref...]
+git lfs push remote [ref...]
+git lfs push --object-id remote [oid...]

+ +

DESCRIPTION

+ +

Upload Git LFS files to the configured endpoint for the current Git remote. By +default, it filters out objects that are already referenced by the local clone +of the remote.

+ +

OPTIONS

+ +
+
--dry-run
+
Print the files that would be pushed, without actually pushing them.
+
--all
+
This pushes all objects to the remote that are referenced by any commit + reachable from the refs provided as arguments. If no refs are provided, then + all local refs are pushed. Note that this behavior differs from that of + git-lfs-fetch(1) when its --all option is used; in that case, all refs + are fetched, including refs other than those under refs/heads and + refs/tags. If you are migrating a repository with these commands, make + sure to run git lfs push for any additional remote refs that contain + Git LFS objects not reachable from your local refs.
+
--object-id
+
This pushes only the object OIDs listed at the end of the command, separated + by spaces.
+
+ +

SEE ALSO

+ +

git-lfs-fetch(1), git-lfs-pre-push(1).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-push(1)
  5. +
+ +
+ + diff --git a/out/merged/git-lfs-3.2.0/man/html/git-lfs-smudge.1.html b/out/merged/git-lfs-3.2.0/man/html/git-lfs-smudge.1.html new file mode 100644 index 0000000000000000000000000000000000000000..e0c2a6754208c607dc247e82e6830c1811b2e10f --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/html/git-lfs-smudge.1.html @@ -0,0 +1,134 @@ + + + + + + git-lfs-smudge(1) - Git smudge filter that converts pointer in blobs to the actual content + + + + +
+ + + +
    +
  1. git-lfs-smudge(1)
  2. +
  3. +
  4. git-lfs-smudge(1)
  5. +
+ + + +

NAME

+

+ git-lfs-smudge - Git smudge filter that converts pointer in blobs to the actual content +

+

SYNOPSIS

+ +

git lfs smudge [path] +git lfs smudge --skip [path]

+ +

DESCRIPTION

+ +

Read a Git LFS pointer file from standard input and write the contents +of the corresponding large file to standard output. If needed, +download the file's contents from the Git LFS endpoint. The path +argument, if provided, is only used for a progress bar.

+ +

Smudge is typically run by Git's smudge filter, configured by the repository's +Git attributes.

+ +

In your Git configuration or in a .lfsconfig file, you may set either or +both of lfs.fetchinclude and lfs.fetchexclude to comma-separated lists of +paths. If lfs.fetchinclude is defined, Git LFS pointer files will only be +replaced with the contents of the corresponding Git LFS object file if their +path matches one in that list, and if lfs.fetchexclude is defined, Git LFS +pointer files will only be replaced with the contents of the corresponding +Git LFS object file if their path does not match one in that list. Paths are +matched using wildcard matching as per gitignore(5). Git LFS pointer files +that are not replaced with the contents of their corresponding object files +are simply copied to standard output without change.

+ +

OPTIONS

+ +

Without any options, git lfs smudge outputs the raw Git LFS content to +standard output.

+ +
+
--skip
+
Skip automatic downloading of objects on clone or pull.
+
GIT_LFS_SKIP_SMUDGE
+
Disables the smudging process. For more, see: git-lfs-config(5).
+
+ +

KNOWN BUGS

+ +

On Windows, Git before 2.34.0 does not handle files in the working tree larger +than 4 gigabytes. Newer versions of Git, as well as Unix versions, are +unaffected.

+ +

SEE ALSO

+ +

git-lfs-install(1), gitattributes(5), gitignore(5).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-smudge(1)
  5. +
+ +
+ + diff --git a/out/merged/git-lfs-3.2.0/man/html/git-lfs-standalone-file.1.html b/out/merged/git-lfs-3.2.0/man/html/git-lfs-standalone-file.1.html new file mode 100644 index 0000000000000000000000000000000000000000..d5400f3ce78619f2d4ef1138883832cd43c96bc2 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/html/git-lfs-standalone-file.1.html @@ -0,0 +1,102 @@ + + + + + + git-lfs-standalone-file(1) - Standalone transfer adapter for file URLs + + + + +
+ + + +
    +
  1. git-lfs-standalone-file(1)
  2. +
  3. +
  4. git-lfs-standalone-file(1)
  5. +
+ + + +

NAME

+

+ git-lfs-standalone-file - Standalone transfer adapter for file URLs +

+

SYNOPSIS

+ +

git lfs standalone-file

+ +

DESCRIPTION

+ +

Provides a standalone transfer adapter for file URLs (local paths).

+ +

By default, Git LFS requires the support of an HTTP server to implement the Git +LFS protocol. However, this tool allows the use of URLs starting with file:/// +(that is, those representing local paths) in addition. Configuration is not +necessary; Git LFS handles this internally.

+ +

When invoked, this tool speaks JSON on input and output as a standalone transfer +adapter. It is not intended for use by end users.

+ +

SEE ALSO

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-standalone-file(1)
  5. +
+ +
+ + diff --git a/out/merged/git-lfs-3.2.0/man/html/git-lfs-status.1.html b/out/merged/git-lfs-3.2.0/man/html/git-lfs-status.1.html new file mode 100644 index 0000000000000000000000000000000000000000..c1673778b63008ac79534f299403daab64b32bc7 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/html/git-lfs-status.1.html @@ -0,0 +1,123 @@ + + + + + + git-lfs-status(1) - Show the status of Git LFS files in the working tree + + + + +
+ + + +
    +
  1. git-lfs-status(1)
  2. +
  3. +
  4. git-lfs-status(1)
  5. +
+ + + +

NAME

+

+ git-lfs-status - Show the status of Git LFS files in the working tree +

+

SYNOPSIS

+ +

git lfs status [options]

+ +

DESCRIPTION

+ +

Display paths of Git LFS objects that

+ +
    +
  • +

    have not been pushed to the Git LFS server. These are large files +that would be uploaded by git push.

    +
  • +
  • +

    have differences between the index file and the current HEAD commit. +These are large files that would be committed by git commit.

    +
  • +
  • +

    have differences between the working tree and the index file. These +are files that could be staged using git add.

    +
  • +
+ +

This command must be run in a non-bare repository.

+ +

OPTIONS

+ +
+
--porcelain
+
Give the output in an easy-to-parse format for scripts.
+
--json
+
Give the output in a stable json format for scripts.
+
+ +

SEE ALSO

+ +

git-lfs-ls-files(1).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-status(1)
  5. +
+ +
+ + diff --git a/out/merged/git-lfs-3.2.0/man/html/git-lfs-track.1.html b/out/merged/git-lfs-3.2.0/man/html/git-lfs-track.1.html new file mode 100644 index 0000000000000000000000000000000000000000..df3f9681ca8e4d8bf9a259a861e11d462b838a8f --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/html/git-lfs-track.1.html @@ -0,0 +1,179 @@ + + + + + + git-lfs-track(1) - View or add Git LFS paths to Git attributes + + + + +
+ + + +
    +
  1. git-lfs-track(1)
  2. +
  3. +
  4. git-lfs-track(1)
  5. +
+ + + +

NAME

+

+ git-lfs-track - View or add Git LFS paths to Git attributes +

+

SYNOPSIS

+ +

git lfs track [options] [pattern...]

+ +

DESCRIPTION

+ +

Start tracking the given patterns(s) through Git LFS. The pattern argument +is written to .gitattributes. If no paths are provided, simply list the +currently-tracked paths.

+ +

The gitattributes documentation states +that patterns use the gitignore pattern rules +to match paths. This means that patterns which contain asterisk (*), question +mark (?), and the bracket characters ([ and ]) are treated specially; to +disable this behavior and treat them literally instead, use --filename or +escape the character with a backslash.

+ +

OPTIONS

+ +
    +
  • +

    --verbose -v: +If enabled, have git lfs track log files which it will touch. Disabled by +default.

    +
  • +
  • +

    --dry-run -d: +If enabled, have git lfs track log all actions it would normally take +(adding entries to .gitattributes, touching files on disk, etc) without +performing any mutative operations to the disk.

    + +

    git lfs track --dry-run [files] also implicitly mocks the behavior of +passing the --verbose, and will log in greater detail what it is doing.

    + +

    Disabled by default.

    +
  • +
  • +

    --filename +Treat the arguments as literal filenames, not as patterns. Any special glob +characters in the filename will be escaped when writing the .gitattributes +file.

    +
  • +
  • +

    --lockable -l +Make the paths 'lockable', meaning they should be locked to edit them, and +will be made read-only in the working copy when not locked.

    +
  • +
  • +

    --not-lockable +Remove the lockable flag from the paths so they are no longer read-only unless +locked.

    +
  • +
  • +

    --no-excluded +Do not list patterns that are excluded in the output; only list patterns that +are tracked.

    +
  • +
  • +

    --no-modify-attrs +Makes matched entries stat-dirty so that Git can re-index files you wish to +convert to LFS. Does not modify any .gitattributes file(s).

    +
  • +
+ +

EXAMPLES

+ +
+
List the patterns that Git LFS is currently tracking: + +

git lfs track

+
+
+
Configure Git LFS to track GIF files: + +

git lfs track "*.gif"

+
+
+
Configure Git LFS to track PSD files and make them read-only unless locked: + +

git lfs track --lockable "*.psd"

+
+
+
Configure Git LFS to track the file named project [1].psd: + +

git lfs track --filename "project [1].psd"

+
+
+
+ +

SEE ALSO

+ +

git-lfs-untrack(1), git-lfs-install(1), gitattributes(5), gitignore(5).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-track(1)
  5. +
+ +
+ + diff --git a/out/merged/git-lfs-3.2.0/man/html/git-lfs-uninstall.1.html b/out/merged/git-lfs-3.2.0/man/html/git-lfs-uninstall.1.html new file mode 100644 index 0000000000000000000000000000000000000000..24641d96d1a6e00e921427f05a4b2671f2221c7a --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/html/git-lfs-uninstall.1.html @@ -0,0 +1,126 @@ + + + + + + git-lfs-uninstall(1) - Remove Git LFS configuration + + + + +
+ + + +
    +
  1. git-lfs-uninstall(1)
  2. +
  3. +
  4. git-lfs-uninstall(1)
  5. +
+ + + +

NAME

+

+ git-lfs-uninstall - Remove Git LFS configuration +

+

SYNOPSIS

+ +

git lfs uninstall

+ +

DESCRIPTION

+ +

Perform the following actions to remove the Git LFS configuration:

+ +
    +
  • Remove the "lfs" clean and smudge filters from the global Git config.
  • +
  • Uninstall the Git LFS pre-push hook if run from inside a Git repository.
  • +
+ +

OPTIONS

+ +
+
--local
+
Removes the "lfs" smudge and clean filters from the local repository's git + config, instead of the global git config (~/.gitconfig).
+
--worktree
+
Removes the "lfs" smudge and clean filters from the current working tree's + git config, instead of the global git config (~/.gitconfig) or local + repository's git config ($GIT_DIR/config). + If multiple working trees are in use, the Git config extension + worktreeConfig must be enabled to use this option. + If only one working tree is in use, --worktree has the same effect + as --local. + This option is only available if the installed Git version is at least + 2.20.0 and therefore supports the "worktreeConfig" extension.
+
--system
+
Removes the "lfs" smudge and clean filters from the system git config, + instead of the global git config (~/.gitconfig).
+
--skip-repo
+
Skips cleanup of the local repo; use if you want to uninstall the global lfs + filters but not make changes to the current repo.
+
+ +

SEE ALSO

+ +

git-lfs-install(1), git-worktree(1).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-uninstall(1)
  5. +
+ +
+ + diff --git a/out/merged/git-lfs-3.2.0/man/html/git-lfs-unlock.1.html b/out/merged/git-lfs-3.2.0/man/html/git-lfs-unlock.1.html new file mode 100644 index 0000000000000000000000000000000000000000..ea3dec5fc97227c9a208baf1ae97236a707fbcc6 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/html/git-lfs-unlock.1.html @@ -0,0 +1,120 @@ + + + + + + git-lfs-unlock(1) - Remove "locked" setting for a file on the Git LFS server + + + + +
+ + + +
    +
  1. git-lfs-unlock(1)
  2. +
  3. +
  4. git-lfs-unlock(1)
  5. +
+ + + +

NAME

+

+ git-lfs-unlock - Remove "locked" setting for a file on the Git LFS server +

+

SYNOPSIS

+ +

git lfs unlock OPTIONS path

+ +

DESCRIPTION

+ +

Removes the given file path as "locked" on the Git LFS server. Files must exist +and have a clean git status before they can be unlocked. The --force flag will +skip these checks.

+ +

OPTIONS

+ +
+
+-r name --remote=name +
+
Specify the Git LFS server to use. Ignored if the lfs.url config key is set.
+
+-f --force +
+
Tells the server to remove the lock, even if it's owned by another user.
+
+-i <id> --id=<id> +
+
Specifies a lock by its ID instead of path.
+
--json
+
Writes lock info as JSON to STDOUT if the command exits successfully. Intended +for interoperation with external tools. If the command returns with a non-zero +exit code, plain text messages will be sent to STDERR.
+
+ +

SEE ALSO

+ +

git-lfs-lock(1), git-lfs-locks(1).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-unlock(1)
  5. +
+ +
+ + diff --git a/out/merged/git-lfs-3.2.0/man/html/git-lfs-untrack.1.html b/out/merged/git-lfs-3.2.0/man/html/git-lfs-untrack.1.html new file mode 100644 index 0000000000000000000000000000000000000000..54ab07593f947a592123bdc15cca8b3f257d5019 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/html/git-lfs-untrack.1.html @@ -0,0 +1,108 @@ + + + + + + git-lfs-untrack(1) - Remove Git LFS paths from Git Attributes + + + + +
+ + + +
    +
  1. git-lfs-untrack(1)
  2. +
  3. +
  4. git-lfs-untrack(1)
  5. +
+ + + +

NAME

+

+ git-lfs-untrack - Remove Git LFS paths from Git Attributes +

+

SYNOPSIS

+ +

git lfs untrack path...

+ +

DESCRIPTION

+ +

Stop tracking the given path(s) through Git LFS. The path argument +can be a glob pattern or a file path.

+ +

EXAMPLES

+ +
+
Configure Git LFS to stop tracking GIF files: + +

git lfs untrack "*.gif"

+
+
+
+ +

SEE ALSO

+ +

git-lfs-track(1), git-lfs-install(1), gitattributes(5).

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-untrack(1)
  5. +
+ +
+ + diff --git a/out/merged/git-lfs-3.2.0/man/html/git-lfs-update.1.html b/out/merged/git-lfs-3.2.0/man/html/git-lfs-update.1.html new file mode 100644 index 0000000000000000000000000000000000000000..f66d524154bfb72b4da8b82ccc903952502b723a --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/html/git-lfs-update.1.html @@ -0,0 +1,122 @@ + + + + + + git-lfs-update(1) - Update Git hooks + + + + +
+ + + +
    +
  1. git-lfs-update(1)
  2. +
  3. +
  4. git-lfs-update(1)
  5. +
+ + + +

NAME

+

+ git-lfs-update - Update Git hooks +

+

SYNOPSIS

+ + + + + + + + +
+git lfs update [--manual--force]
+ +

DESCRIPTION

+ +

Updates the Git hooks used by Git LFS. Silently upgrades known hook contents. +If you have your own custom hooks you may need to use one of the extended +options below.

+ +

OPTIONS

+ +
    +
  • +

    --manual -m + Print instructions for manually updating your hooks to include git-lfs + functionality. Use this option if git lfs update fails because of existing + hooks and you want to retain their functionality.

    +
  • +
  • +

    --force -f + Forcibly overwrite any existing hooks with git-lfs hooks. Use this option + if git lfs update fails because of existing hooks but you don't care + about their current contents.

    +
  • +
+ +

SEE ALSO

+ +

Part of the git-lfs(1) suite.

+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs-update(1)
  5. +
+ +
+ + diff --git a/out/merged/git-lfs-3.2.0/man/html/git-lfs.1.html b/out/merged/git-lfs-3.2.0/man/html/git-lfs.1.html new file mode 100644 index 0000000000000000000000000000000000000000..6e3a7f9fb16a3be861a19b18367699511049afae --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/html/git-lfs.1.html @@ -0,0 +1,220 @@ + + + + + + git-lfs(1) - Work with large files in Git repositories + + + + +
+ + + +
    +
  1. git-lfs(1)
  2. +
  3. +
  4. git-lfs(1)
  5. +
+ + + +

NAME

+

+ git-lfs - Work with large files in Git repositories +

+

SYNOPSIS

+ +

git lfs command [args]

+ +

DESCRIPTION

+ +

Git LFS is a system for managing and versioning large files in +association with a Git repository. Instead of storing the large files +within the Git repository as blobs, Git LFS stores special "pointer +files" in the repository, while storing the actual file contents on a +Git LFS server. The contents of the large file are downloaded +automatically when needed, for example when a Git branch containing +the large file is checked out.

+ +

Git LFS works by using a "smudge" filter to look up the large file +contents based on the pointer file, and a "clean" filter to create a +new version of the pointer file when the large file's contents change. +It also uses a pre-push hook to upload the large file contents to +the Git LFS server whenever a commit containing a new large file +version is about to be pushed to the corresponding Git server.

+ +

COMMANDS

+ +

Like Git, Git LFS commands are separated into high level ("porcelain") +commands and low level ("plumbing") commands.

+ +

High level porcelain commands

+ +
+
git-lfs-checkout(1)
+
Populate working copy with real content from Git LFS files.
+
git-lfs-dedup(1)
+
De-duplicate Git LFS files.
+
git-lfs-env(1)
+
Display the Git LFS environment.
+
git-lfs-ext(1)
+
Display Git LFS extension details.
+
git-lfs-fetch(1)
+
Download Git LFS files from a remote.
+
git-lfs-fsck(1)
+
Check Git LFS files for consistency.
+
git-lfs-install(1)
+
Install Git LFS configuration.
+
git-lfs-lock(1)
+
Set a file as "locked" on the Git LFS server.
+
git-lfs-locks(1)
+
List currently "locked" files from the Git LFS server.
+
git-lfs-logs(1)
+
Show errors from the Git LFS command.
+
git-lfs-ls-files(1)
+
Show information about Git LFS files in the index and working tree.
+
git-lfs-migrate(1)
+
Migrate history to or from Git LFS
+
git-lfs-prune(1)
+
Delete old Git LFS files from local storage
+
git-lfs-pull(1)
+
Fetch Git LFS changes from the remote & checkout any required working tree + files.
+
git-lfs-push(1)
+
Push queued large files to the Git LFS endpoint.
+
git-lfs-status(1)
+
Show the status of Git LFS files in the working tree.
+
git-lfs-track(1)
+
View or add Git LFS paths to Git attributes.
+
git-lfs-uninstall(1)
+
Uninstall Git LFS by removing hooks and smudge/clean filter configuration.
+
git-lfs-unlock(1)
+
Remove "locked" setting for a file on the Git LFS server.
+
git-lfs-untrack(1)
+
Remove Git LFS paths from Git Attributes.
+
git-lfs-update(1)
+
Update Git hooks for the current Git repository.
+
git-lfs-version(1)
+
Report the version number.
+
+ +

Low level plumbing commands

+ +
+
git-lfs-clean(1)
+
Git clean filter that converts large files to pointers.
+
git-lfs-filter-process(1)
+
Git process filter that converts between large files and pointers.
+
git-lfs-merge-driver(1)
+
Merge text-based LFS files
+
git-lfs-pointer(1)
+
Build and compare pointers.
+
git-lfs-post-checkout(1)
+
Git post-checkout hook implementation.
+
git-lfs-post-commit(1)
+
Git post-commit hook implementation.
+
git-lfs-post-merge(1)
+
Git post-merge hook implementation.
+
git-lfs-pre-push(1)
+
Git pre-push hook implementation.
+
git-lfs-smudge(1)
+
Git smudge filter that converts pointer in blobs to the actual content.
+
git-lfs-standalone-file(1)
+
Git LFS standalone transfer adapter for file URLs (local paths).
+
+ +

EXAMPLES

+ +

To get started with Git LFS, the following commands can be used.

+ +
    +
  1. +

    Setup Git LFS on your system. You only have to do this once per +repository per machine:

    + +
    git lfs install
    +
    +
  2. +
  3. +

    Choose the type of files you want to track, for examples all ISO +images, with git-lfs-track(1):

    + +
    git lfs track "*.iso"
    +
    +
  4. +
  5. +

    The above stores this information in gitattributes(5) files, so +that file needs to be added to the repository:

    + +
    git add .gitattributes
    +
    +
  6. +
  7. +

    Commit, push and work with the files normally:

    + +
    git add file.iso
    +git commit -m "Add disk image"
    +git push
    +
    +
  8. +
+ +
    +
  1. +
  2. May 2022
  3. +
  4. git-lfs(1)
  5. +
+ +
+ + diff --git a/out/merged/git-lfs-3.2.0/man/man1/git-lfs-checkout.1 b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-checkout.1 new file mode 100644 index 0000000000000000000000000000000000000000..30291fc1ca608cecc75c233afb3dd715d8954e8d --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-checkout.1 @@ -0,0 +1,50 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-CHECKOUT" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-checkout\fR \- Update working copy with file content if available +.SH "SYNOPSIS" +\fBgit lfs checkout\fR [\fIglob\-pattern\fR\|\.\|\.\|\.] +.br +\fBgit lfs checkout\fR \-\-to \fIfile\fR {\-\-base|\-\-ours|\-\-theirs} \fIconflict\-obj\-path\fR +.SH "DESCRIPTION" +Try to ensure that the working copy contains file content for Git LFS objects for the current ref, if the object data is available\. Does not download any content; see git\-lfs\-fetch(1) for that\. +.P +Checkout scans the current ref for all LFS objects that would be required, then where a file is either missing in the working copy, or contains placeholder pointer content with the same SHA, the real file content is written, provided we have it in the local store\. Modified files are never overwritten\. +.P +One or more \fIglob\-pattern\fRs may be provided as arguments to restrict the set of files that are updated\. Glob patterns are matched as per the format described in gitignore(5)\. +.P +When used with \fB\-\-to\fR and the working tree is in a conflicted state due to a merge, this option checks out one of the three stages a conflicting Git LFS object into a separate file (which can be outside of the work tree)\. This can make using diff tools to inspect and resolve merges easier\. A single Git LFS object\'s file path must be provided in \fIconflict\-obj\-path\fR\. +.SH "OPTIONS" +.TP +\fB\-\-base\fR +Check out the merge base of the specified file\. +.TP +\fB\-\-ours\fR +Check out our side (that of the current branch) of the conflict for the specified file\. +.TP +\fB\-\-theirs\fR +Check out their side (that of the other branch) of the conflict for the specified file\. +.TP +\fB\-\-to\fR \fIpath\fR +If the working tree is in a conflicted state, check out the portion of the conflict specified by \fB\-\-base\fR, \fB\-\-ours\fR, or \fB\-\-theirs\fR to the given path\. +.SH "EXAMPLES" +.TP +Checkout all files that are missing or placeholders: + +.P +\fB$ git lfs checkout\fR +.TP +Checkout a specific couple of files: + +.P +\fB$ git lfs checkout path/to/file1\.png path/to\.file2\.png\fR +.TP +Checkout a path with a merge conflict into separate files: + +.P +``` # Attempt merge with a branch that has a merge conflict $ git merge conflicting\-branch CONFLICT (content): Merge conflict in path/to/conflicting/file\.dat$ git lfs checkout ours\.dat \-\-ours path/to/conflicting/file\.dat $ git lfs checkout theirs\.dat \-\-theirs path/to/conflicting/file\.dat# then resolve conflict (e\.g\., by choosing one version over # the other, or creating a new version)$ rm ours\.dat theirs\.dat $ git add path/to/conflicting/file\.dat $ git merge \-\-continue ``` +.SH "SEE ALSO" +git\-lfs\-fetch(1), git\-lfs\-pull(1), gitignore(5)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/out/merged/git-lfs-3.2.0/man/man1/git-lfs-clean.1 b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-clean.1 new file mode 100644 index 0000000000000000000000000000000000000000..0b1cfdcd8e98342a7c426afd51b2e16dc3a312bb --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-clean.1 @@ -0,0 +1,17 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-CLEAN" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-clean\fR \- Git clean filter that converts large files to pointers +.SH "SYNOPSIS" +\fBgit lfs clean\fR \fIpath\fR +.SH "DESCRIPTION" +Read the contents of a large file from standard input, and write a Git LFS pointer file for that file to standard output\. +.P +Clean is typically run by Git\'s clean filter, configured by the repository\'s Git attributes\. +.P +Clean is not part of the user\-facing Git plumbing commands\. To preview the pointer of a large file as it would be generated, see the git\-lfs\-pointer(1) command\. +.SH "SEE ALSO" +git\-lfs\-install(1), git\-lfs\-push(1), git\-lfs\-pointer(1), gitattributes(5)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/out/merged/git-lfs-3.2.0/man/man1/git-lfs-clone.1 b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-clone.1 new file mode 100644 index 0000000000000000000000000000000000000000..494c75ea395e7605969a46742614e5fe38927fa9 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-clone.1 @@ -0,0 +1,34 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-CLONE" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-clone\fR \- Efficiently clone a LFS\-enabled repository +.SH "SYNOPSIS" +\fBgit lfs clone\fR [git clone options] \fIrepository\fR [\fIdirectory\fR] +.SH "DESCRIPTION" +Clone an LFS enabled Git repository more efficiently by disabling LFS during the git clone, then performing a \'git lfs pull\' directly afterwards\. +.P +\'git lfs clone\' also installs all of the repo\-level hooks (\.git/hooks) that LFS requires to operate\. If \fB\-\-separate\-git\-dir\fR is given, the hooks will be installed there\. +.P +This is faster than a regular \'git clone\' because that will download LFS content using the smudge filter, which is executed individually per file in the working copy\. This is relatively inefficient compared to the batch mode and parallel downloads performed by \'git lfs pull\'\. +.SH "OPTIONS" +All options supported by \'git clone\' +.TP +\fB\-I\fR \fIpaths\fR \fB\-\-include=\fR\fIpaths\fR +See \fIINCLUDE AND EXCLUDE\fR +.TP +\fB\-X\fR \fIpaths\fR \fB\-\-exclude=\fR\fIpaths\fR +See \fIINCLUDE AND EXCLUDE\fR +.TP +\fB\-\-skip\-repo\fR +Skip installing repo\-level hooks (\.git/hooks) that LFS requires\. Disabled by default\. +.SH "INCLUDE AND EXCLUDE" +You can configure Git LFS to only fetch objects to satisfy references in certain paths of the repo, and/or to exclude certain paths of the repo, to reduce the time you spend downloading things you do not use\. +.P +In your Git configuration or in a \fB\.lfsconfig\fR file, you may set either or both of \fBlfs\.fetchinclude\fR and \fBlfs\.fetchexclude\fR to comma\-separated lists of paths\. If \fBlfs\.fetchinclude\fR is defined, Git LFS objects will only be fetched if their path matches one in that list, and if \fBlfs\.fetchexclude\fR is defined, Git LFS objects will only be fetched if their path does not match one in that list\. Paths are matched using wildcard matching as per gitignore(5)\. +.P +Note that using the command\-line options \fB\-I\fR and \fB\-X\fR override the respective configuration settings\. Setting either option to an empty string clears the value\. +.SH "SEE ALSO" +git\-clone(1), git\-lfs\-pull(1), gitignore(5)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/out/merged/git-lfs-3.2.0/man/man1/git-lfs-dedup.1 b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-dedup.1 new file mode 100644 index 0000000000000000000000000000000000000000..6358c0aa9d665e7a93b1a1b0ff252e4f472f15d1 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-dedup.1 @@ -0,0 +1,15 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-DEDUP" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-dedup\fR \- Deduplicate Git LFS files +.SH "SYNOPSIS" +\fBgit lfs dedup\fR +.SH "DESCRIPTION" +Deduplicates storage by re\-creating working tree files as clones of the files in the Git LFS storage directory using the operating system\'s copy\-on\-write file creation functionality\. +.P +If the operating system or file system don\'t support copy\-on\-write file creation, this command exits unsuccessfully\. +.P +This command will also exit without success if any Git LFS extensions are configured, as these will typically be used to alter the file contents before they are written to the Git LFS storage directory, and therefore the working tree files should not be copy\-on\-write clones of the LFS object files\. +.SH "SEE ALSO" +Part of the git\-lfs(1) suite\. diff --git a/out/merged/git-lfs-3.2.0/man/man1/git-lfs-env.1 b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-env.1 new file mode 100644 index 0000000000000000000000000000000000000000..084a669ef873ca78a9d25f9dfe800cf980b7622c --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-env.1 @@ -0,0 +1,11 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-ENV" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-env\fR \- Display the Git LFS environment +.SH "SYNOPSIS" +\fBgit lfs env\fR +.SH "DESCRIPTION" +Display the current Git LFS environment\. +.SH "SEE ALSO" +Part of the git\-lfs(1) suite\. diff --git a/out/merged/git-lfs-3.2.0/man/man1/git-lfs-ext.1 b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-ext.1 new file mode 100644 index 0000000000000000000000000000000000000000..1b16271c0a79d674abc5fefab38b646022b9ae88 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-ext.1 @@ -0,0 +1,21 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-EXT" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-ext\fR \- View extension details +.SH "SYNOPSIS" +\fBgit lfs ext list\fR [\fIname\fR\|\.\|\.\|\.] +.SH "DESCRIPTION" +Git LFS extensions enable the manipulation of files streams during smudge and clean\. +.SH "EXAMPLES" +.IP "\[ci]" 4 +List details for all extensions +.IP +\fBgit lfs ext\fR \fBgit lfs ext list\fR +.IP "\[ci]" 4 +List details for the specified extensions +.IP +\fBgit lfs ext list \'foo\' \'bar\'\fR +.IP "" 0 +.SH "SEE ALSO" +Part of the git\-lfs(1) suite\. diff --git a/out/merged/git-lfs-3.2.0/man/man1/git-lfs-fetch.1 b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-fetch.1 new file mode 100644 index 0000000000000000000000000000000000000000..490dbc8658520c488284bf1b0502696797d6a8d0 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-fetch.1 @@ -0,0 +1,100 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-FETCH" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-fetch\fR \- Download all Git LFS files for a given ref +.SH "SYNOPSIS" +\fBgit lfs fetch\fR [options] [\fIremote\fR [\fIref\fR\|\.\|\.\|\.]] +.SH "DESCRIPTION" +Download Git LFS objects at the given refs from the specified remote\. See \fIDEFAULT REMOTE\fR and \fIDEFAULT REFS\fR for what happens if you don\'t specify\. +.P +This does not update the working copy\. +.SH "OPTIONS" +.TP +\fB\-I\fR \fIpaths\fR \fB\-\-include=\fR\fIpaths\fR +Specify lfs\.fetchinclude just for this invocation; see \fIINCLUDE AND EXCLUDE\fR +.TP +\fB\-X\fR \fIpaths\fR \fB\-\-exclude=\fR\fIpaths\fR +Specify lfs\.fetchexclude just for this invocation; see \fIINCLUDE AND EXCLUDE\fR +.TP +\fB\-\-recent\fR +Download objects referenced by recent branches & commits in addition to those that would otherwise be downloaded\. See \fIRECENT CHANGES\fR +.TP +\fB\-\-all\fR +Download all objects that are referenced by any commit reachable from the refs provided as arguments\. If no refs are provided, then all refs are fetched\. This is primarily for backup and migration purposes\. Cannot be combined with \-\-recent or \-\-include/\-\-exclude\. Ignores any globally configured include and exclude paths to ensure that all objects are downloaded\. +.TP +\fB\-\-prune\fR \fB\-p\fR +Prune old and unreferenced objects after fetching, equivalent to running \fBgit lfs prune\fR afterwards\. See git\-lfs\-prune(1) for more details\. +.SH "INCLUDE AND EXCLUDE" +You can configure Git LFS to only fetch objects to satisfy references in certain paths of the repo, and/or to exclude certain paths of the repo, to reduce the time you spend downloading things you do not use\. +.P +In your Git configuration or in a \fB\.lfsconfig\fR file, you may set either or both of \fBlfs\.fetchinclude\fR and \fBlfs\.fetchexclude\fR to comma\-separated lists of paths\. If \fBlfs\.fetchinclude\fR is defined, Git LFS objects will only be fetched if their path matches one in that list, and if \fBlfs\.fetchexclude\fR is defined, Git LFS objects will only be fetched if their path does not match one in that list\. Paths are matched using wildcard matching as per gitignore(5)\. +.P +Note that using the command\-line options \fB\-I\fR and \fB\-X\fR override the respective configuration settings\. Setting either option to an empty string clears the value\. +.SS "Examples" +.IP "\[ci]" 4 +\fBgit config lfs\.fetchinclude "textures,images/foo*"\fR +.IP +This will only fetch objects referenced in paths in the textures folder, and files called foo* in the images folder +.IP "\[ci]" 4 +\fBgit config lfs\.fetchinclude "*\.jpg,*\.png,*\.tga"\fR +.IP +Only fetch JPG/PNG/TGA files, wherever they are in the repository +.IP "\[ci]" 4 +\fBgit config lfs\.fetchexclude "media/reallybigfiles"\fR +.IP +Don\'t fetch any LFS objects referenced in the folder media/reallybigfiles, but fetch everything else +.IP "\[ci]" 4 +\fBgit config lfs\.fetchinclude "media"\fR +.br +\fBgit config lfs\.fetchexclude "media/excessive"\fR +.IP +Only fetch LFS objects in the \'media\' folder, but exclude those in one of its subfolders\. +.IP "" 0 +.SH "DEFAULT REMOTE" +Without arguments, fetch downloads from the default remote\. The default remote is the same as for \fBgit fetch\fR, i\.e\. based on the remote branch you\'re tracking first, or origin otherwise\. +.SH "DEFAULT REFS" +If no refs are given as arguments, the currently checked out ref is used\. In addition, if enabled, recently changed refs and commits are also included\. See \fIRECENT CHANGES\fR for details\. +.SH "RECENT CHANGES" +If the \fB\-\-recent\fR option is specified, or if the gitconfig option \fBlfs\.fetchrecentalways\fR is true, then after the current ref (or those in the arguments) is fetched, we also search for \'recent\' changes to fetch objects for, so that it\'s more convenient to checkout or diff those commits without incurring further downloads\. +.P +What changes are considered \'recent\' is based on a number of gitconfig options: +.IP "\[ci]" 4 +\fBlfs\.fetchrecentrefsdays\fR If non\-zero, includes branches which have commits within N days of the current date\. Only local refs are included unless lfs\.fetchrecentremoterefs is true\. The default is 7 days\. +.IP "\[ci]" 4 +\fBlfs\.fetchrecentremoterefs\fR If true, fetches remote refs (for the remote you\'re fetching) as well as local refs in the recent window\. This is useful to fetch objects for remote branches you might want to check out later\. The default is true; if you set this to false, fetching for those branches will only occur when you either check them out (losing the advantage of fetch \-\-recent), or create a tracking local branch separately then fetch again\. +.IP "\[ci]" 4 +\fBlfs\.fetchrecentcommitsdays\fR In addition to fetching at branches, also fetches changes made within N days of the latest commit on the branch\. This is useful if you\'re often reviewing recent changes\. The default is 0 (no previous changes)\. +.IP "\[ci]" 4 +\fBlfs\.fetchrecentalways\fR Always operate as if \-\-recent was provided on the command line\. +.IP "" 0 +.SH "EXAMPLES" +.IP "\[ci]" 4 +Fetch the LFS objects for the current ref from default remote +.IP +\fBgit lfs fetch\fR +.IP "\[ci]" 4 +Fetch the LFS objects for the current ref AND recent changes from default remote +.IP +\fBgit lfs fetch \-\-recent\fR +.IP "\[ci]" 4 +Fetch the LFS objects for the current ref from a secondary remote \'upstream\' +.IP +\fBgit lfs fetch upstream\fR +.IP "\[ci]" 4 +Fetch all the LFS objects from the default remote that are referenced by any commit in the \fBmain\fR and \fBdevelop\fR branches +.IP +\fBgit lfs fetch \-\-all origin main develop\fR +.IP "\[ci]" 4 +Fetch the LFS objects for a branch from origin +.IP +\fBgit lfs fetch origin mybranch\fR +.IP "\[ci]" 4 +Fetch the LFS objects for 2 branches and a commit from origin +.IP +\fBgit lfs fetch origin main mybranch e445b45c1c9c6282614f201b62778e4c0688b5c8\fR +.IP "" 0 +.SH "SEE ALSO" +git\-lfs\-checkout(1), git\-lfs\-pull(1), git\-lfs\-prune(1), gitconfig(5)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/out/merged/git-lfs-3.2.0/man/man1/git-lfs-filter-process.1 b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-filter-process.1 new file mode 100644 index 0000000000000000000000000000000000000000..537b01830dc7d170df1ca739c9ede9f8d364bc8f --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-filter-process.1 @@ -0,0 +1,27 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-FILTER\-PROCESS" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-filter\-process\fR \- Git filter process that converts between pointer and actual content +.SH "SYNOPSIS" +\fBgit lfs filter\-process\fR \fBgit lfs filter\-process \-\-skip\fR +.SH "DESCRIPTION" +Implement the Git process filter API, exchanging handshake messages and then accepting and responding to requests to either clean or smudge a file\. +.P +filter\-process is always run by Git\'s filter process, and is configured by the repository\'s Git attributes\. +.P +In your Git configuration or in a \fB\.lfsconfig\fR file, you may set either or both of \fBlfs\.fetchinclude\fR and \fBlfs\.fetchexclude\fR to comma\-separated lists of paths\. If \fBlfs\.fetchinclude\fR is defined, Git LFS pointer files will only be replaced with the contents of the corresponding Git LFS object file if their path matches one in that list, and if \fBlfs\.fetchexclude\fR is defined, Git LFS pointer files will only be replaced with the contents of the corresponding Git LFS object file if their path does not match one in that list\. Paths are matched using wildcard matching as per gitignore(5)\. Git LFS pointer files that are not replaced with the contents of their corresponding object files are simply copied to standard output without change\. +.P +The filter process uses Git\'s pkt\-line protocol to communicate, and is documented in detail in gitattributes(5)\. +.SH "OPTIONS" +Without any options, filter\-process accepts and responds to requests normally\. +.TP +\fB\-\-skip\fR +Skip automatic downloading of objects on clone or pull\. +.TP +\fBGIT_LFS_SKIP_SMUDGE\fR +Disables the smudging process\. For more, see: git\-lfs\-config(5)\. +.SH "SEE ALSO" +git\-lfs\-clean(1), git\-lfs\-install(1), git\-lfs\-smudge(1), gitattributes(5), gitignore(5)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/out/merged/git-lfs-3.2.0/man/man1/git-lfs-fsck.1 b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-fsck.1 new file mode 100644 index 0000000000000000000000000000000000000000..04992190a2e95d751a8d9f7de4666dba2e99b93b --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-fsck.1 @@ -0,0 +1,28 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-FSCK" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-fsck\fR \- Check GIT LFS files for consistency +.SH "SYNOPSIS" +\fBgit lfs fsck\fR [options] [revisions] +.SH "DESCRIPTION" +Checks all Git LFS files in the current HEAD for consistency\. +.P +Corrupted files are moved to "\.git/lfs/bad"\. +.P +The revisions may be specified as either a single committish, in which case only that commit is inspected; specified as a range of the form \fBA\.\.B\fR (and only this form), in which case that range is inspected; or omitted entirely, in which case HEAD (and, for \-\-objects, the index) is examined\. +.P +The default is to perform all checks\. +.P +In your Git configuration or in a \fB\.lfsconfig\fR file, you may set \fBlfs\.fetchexclude\fR to a comma\-separated list of paths\. If \fBlfs\.fetchexclude\fR is defined, then any Git LFS files whose paths match one in that list will not be checked for consistency\. Paths are matched using wildcard matching as per gitignore(5)\. +.SH "OPTIONS" +.TP +\fB\-\-objects\fR +Check that each object in HEAD matches its expected hash and that each object exists on disk\. +.TP +\fB\-\-pointers\fR +Check that each pointer is canonical and that each file which should be stored as a Git LFS file is so stored\. +.SH "SEE ALSO" +git\-lfs\-ls\-files(1), git\-lfs\-status(1), gitignore(5)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/out/merged/git-lfs-3.2.0/man/man1/git-lfs-install.1 b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-install.1 new file mode 100644 index 0000000000000000000000000000000000000000..eb70f36efe9034a73df5b396b5443f06efc29b21 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-install.1 @@ -0,0 +1,41 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-INSTALL" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-install\fR \- Install Git LFS configuration\. +.SH "SYNOPSIS" +\fBgit lfs install\fR \fIoptions\fR +.SH "DESCRIPTION" +Perform the following actions to ensure that Git LFS is setup properly: +.IP "\[ci]" 4 +Set up the clean and smudge filters under the name "lfs" in the global Git config\. +.IP "\[ci]" 4 +Install a pre\-push hook to run git\-lfs\-pre\-push(1) for the current repository, if run from inside one\. If "core\.hooksPath" is configured in any Git configuration (and supported, i\.e\., the installed Git version is at least 2\.9\.0), then the pre\-push hook will be installed to that directory instead\. +.IP "" 0 +.SH "OPTIONS" +Without any options, \fBgit lfs install\fR will only setup the "lfs" smudge and clean filters if they are not already set\. +.TP +\fB\-\-force\fR +Sets the "lfs" smudge and clean filters, overwriting existing values\. +.TP +\fB\-\-local\fR +Sets the "lfs" smudge and clean filters in the local repository\'s git config, instead of the global git config (~/\.gitconfig)\. +.TP +\fB\-\-worktree\fR +Sets the "lfs" smudge and clean filters in the current working tree\'s git config, instead of the global git config (~/\.gitconfig) or local repository\'s git config ($GIT_DIR/config)\. If multiple working trees are in use, the Git config extension \fBworktreeConfig\fR must be enabled to use this option\. If only one working tree is in use, \fB\-\-worktree\fR has the same effect as \fB\-\-local\fR\. This option is only available if the installed Git version is at least 2\.20\.0 and therefore supports the "worktreeConfig" extension\. +.TP +\fB\-\-manual\fR +Print instructions for manually updating your hooks to include git\-lfs functionality\. Use this option if \fBgit lfs install\fR fails because of existing hooks and you want to retain their functionality\. +.TP +\fB\-\-system\fR +Sets the "lfs" smudge and clean filters in the system git config, e\.g\. /etc/gitconfig instead of the global git config (~/\.gitconfig)\. +.TP +\fB\-\-skip\-smudge\fR +Skips automatic downloading of objects on clone or pull\. This requires a manual "git lfs pull" every time a new commit is checked out on your repository\. +.TP +\fB\-\-skip\-repo\fR +Skips setup of the local repo; use if you want to install the global lfs filters but not make changes to the current repo\. +.SH "SEE ALSO" +git\-lfs\-uninstall(1), git\-worktree(1)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/out/merged/git-lfs-3.2.0/man/man1/git-lfs-lock.1 b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-lock.1 new file mode 100644 index 0000000000000000000000000000000000000000..33ac16393211a7a0a64a4899eab1fbb4f19922fd --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-lock.1 @@ -0,0 +1,22 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-LOCK" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-lock\fR \- Set a file as "locked" on the Git LFS server +.SH "SYNOPSIS" +\fBgit lfs lock\fR \fIoptions\fR \fIpath\fR +.SH "DESCRIPTION" +Sets the given file path as "locked" against the Git LFS server, with the intention of blocking attempts by other users to update the given path\. Locking a file requires the file to exist in the working copy\. +.P +Once locked, LFS will verify that Git pushes do not modify files locked by other users\. See the description of the \fBlfs\.\.locksverify\fR config key in git\-lfs\-config(5) for details\. +.SH "OPTIONS" +.TP +\fB\-r\fR \fIname\fR \fB\-\-remote=\fR\fIname\fR +Specify the Git LFS server to use\. Ignored if the \fBlfs\.url\fR config key is set\. +.TP +\fB\-\-json\fR +Writes lock info as JSON to STDOUT if the command exits successfully\. Intended for interoperation with external tools\. If the command returns with a non\-zero exit code, plain text messages will be sent to STDERR\. +.SH "SEE ALSO" +git\-lfs\-unlock(1), git\-lfs\-locks(1)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/out/merged/git-lfs-3.2.0/man/man1/git-lfs-locks.1 b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-locks.1 new file mode 100644 index 0000000000000000000000000000000000000000..3d651a0bc3ea7c6561e60fc5c3744ed686639b2c --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-locks.1 @@ -0,0 +1,38 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-LOCKS" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-locks\fR \- Lists currently locked files from the Git LFS server\. +.SH "SYNOPSIS" +\fBgit lfs locks\fR \fIoptions\fR +.SH "DESCRIPTION" +Lists current locks from the Git LFS server\. +.SH "OPTIONS" +.TP +\fB\-r\fR \fIname\fR \fB\-\-remote=\fR\fIname\fR +Specify the Git LFS server to use\. Ignored if the \fBlfs\.url\fR config key is set\. +.TP +\fB\-i \fR \fB\-\-id=\fR +Specifies a lock by its ID\. Returns a single result\. +.TP +\fB\-p \fR \fB\-\-path=\fR +Specifies a lock by its path\. Returns a single result\. +.TP +\fB\-\-local\fR +Lists only our own locks which are cached locally\. Skips a remote call\. +.TP +\fB\-\-cached\fR +Lists cached locks from the last remote call\. Contrary to \-\-local, this will include locks of other users as well\. This option is intended to display the last known locks in case you are offline\. There is no guarantee that locks on the server have not changed in the meanwhile\. +.TP +\fB\-\-verify\fR +Verifies the lock owner on the server and marks our own locks by \'O\'\. Own locks are actually held by us and corresponding files can be updated for the next push\. All other locks are held by someone else\. Contrary to \-\-local, this option will also detect locks which are held by us despite no local lock information being available (e\.g\. because the file had been locked from a different clone); it will also detect \'broken\' locks (e\.g\. if someone else has forcefully unlocked our files)\. +.TP +\fB\-l \fR \fB\-\-limit=\fR +Specifies number of results to return\. +.TP +\fB\-\-json\fR +Writes lock info as JSON to STDOUT if the command exits successfully\. Intended for interoperation with external tools\. If the command returns with a non\-zero exit code, plain text messages will be sent to STDERR\. +.SH "SEE ALSO" +git\-lfs\-lock(1), git\-lfs\-unlock(1)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/out/merged/git-lfs-3.2.0/man/man1/git-lfs-logs.1 b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-logs.1 new file mode 100644 index 0000000000000000000000000000000000000000..de5e867b5ad1dfbf065290693c71c365946b1f47 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-logs.1 @@ -0,0 +1,30 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-LOGS" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-logs\fR \- Show errors from the git\-lfs command +.SH "SYNOPSIS" +\fBgit lfs logs\fR +.br +\fBgit lfs logs\fR \fIfile\fR +.br +\fBgit lfs logs clear\fR +.br +\fBgit lfs logs boomtown\fR +.br +.SH "DESCRIPTION" +Display errors from the git\-lfs command\. Any time it crashes, the details are saved to "\.git/lfs/logs"\. +.SH "COMMANDS" +.TP +\fBclear\fR +Clears all of the existing logged errors\. +.TP +\fBboomtown\fR +Triggers a dummy exception\. +.SH "OPTIONS" +Without any options, \fBgit lfs logs\fR simply shows the list of error logs\. +.TP +\fIfile\fR +Shows the specified error log\. Use "last" to show the most recent error\. +.SH "SEE ALSO" +Part of the git\-lfs(1) suite\. diff --git a/out/merged/git-lfs-3.2.0/man/man1/git-lfs-ls-files.1 b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-ls-files.1 new file mode 100644 index 0000000000000000000000000000000000000000..e94ee72c0d366b190014106058566dcaa211f331 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-ls-files.1 @@ -0,0 +1,42 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-LS\-FILES" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-ls\-files\fR \- Show information about Git LFS files in the index and working tree +.SH "SYNOPSIS" +\fBgit lfs ls\-files\fR [\fIref\fR] +.br +\fBgit lfs ls\-files\fR \fIref\fR \fIref\fR +.SH "DESCRIPTION" +Display paths of Git LFS files that are found in the tree at the given reference\. If no reference is given, scan the currently checked\-out branch\. If two references are given, the LFS files that are modified between the two references are shown; deletions are not listed\. +.P +An asterisk (*) after the OID indicates a full object, a minus (\-) indicates an LFS pointer\. +.SH "OPTIONS" +.TP +\fB\-l\fR \fB\-\-long\fR +Show the entire 64 character OID, instead of just first 10\. +.TP +\fB\-s\fR \fB\-\-size\fR +Show the size of the LFS object between parenthesis at the end of a line\. +.TP +\fB\-d\fR \fB\-\-debug\fR +Show as much information as possible about a LFS file\. This is intended for manual inspection; the exact format may change at any time\. +.TP +\fB\-a\fR \fB\-\-all\fR +Inspects the full history of the repository, not the current HEAD (or other provided reference)\. This will include previous versions of LFS objects that are no longer found in the current tree\. +.TP +\fB\-\-deleted\fR +Shows the full history of the given reference, including objects that have been deleted\. +.TP +\fB\-I\fR \fIpaths\fR \fB\-\-include=\fR\fIpaths\fR +Include paths matching only these patterns; see [FETCH SETTINGS]\. +.TP +\fB\-X\fR \fIpaths\fR \fB\-\-exclude=\fR\fIpaths\fR +Exclude paths matching any of these patterns; see [FETCH SETTINGS]\. +.TP +\fB\-n\fR \fB\-\-name\-only\fR +Show only the lfs tracked file names\. ## SEE ALSO +.P +git\-lfs\-status(1), git\-lfs\-config(5)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/out/merged/git-lfs-3.2.0/man/man1/git-lfs-merge-driver.1 b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-merge-driver.1 new file mode 100644 index 0000000000000000000000000000000000000000..33e9acade6891be5eb7daba0080ccc6ca7a7b3cf --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-merge-driver.1 @@ -0,0 +1,58 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-MERGE\-DRIVER" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-merge\-driver\fR \- Merge text\-based LFS files +.SH "SYNOPSIS" +\fBgit lfs merge\-driver\fR \fIoptions\fR +.SH "DESCRIPTION" +Merge text files stored in Git LFS using the default Git merge machinery, or a custom merge driver if specified\. Note that this, in general, does not support partial renames or copies because Git does not support them in this case\. +.P +This program is intended to be invoked automatically by Git and not by users manually\. See \fICONFIGURATION\fR for details on the configuration required for that\. +.SH "OPTIONS" +.IP "\[ci]" 4 +\fB\-\-ancestor\fR \fIpath\fR Specify the file containing the ancestor revision\. +.IP "\[ci]" 4 +\fB\-\-current\fR \fIpath\fR Specify the file containing the current revision\. +.IP "\[ci]" 4 +\fB\-\-marker\-size\fR \fInum\fR Specify the conflict marker size as an integer\. +.IP "\[ci]" 4 +\fB\-\-other\fR \fIpath\fR Specify the file containing the other revision\. +.IP "\[ci]" 4 +\fB\-\-program\fR \fIprogram\fR Specify a command, which is passed to the shell after substitution, that performs the actual merge\. If this is not specified, \fBgit merge\-file\fR is invoked with appropriate arguments to perform the merge of the file\. +.IP +See \fICONFIGURATION\fR for the sequences which are substituted here\. +.IP "" 0 +.SH "CONFIGURATION" +Git allows the use of a custom merge driver for files based on the \fBmerge\fR attribute set in \fB\.gitattributes\fR\. By default, when using \fBgit lfs track\fR, this value is set to \fBlfs\fR\. +.P +Because Git LFS can be used to store both text and binary files and it isn\'t always clear which behavior should be used, Git LFS does not enable this merge driver by default\. However, if you know that some or all of your files are text files, then you can set the \fBmerge\fR attribute for those files to \fBlfs\-text\fR and use \fBgit config\fR to set the merge driver like so: +.P +\fBconsole $ git config merge\.lfs\-text\.driver \'git lfs merge\-driver \-\-ancestor %O \-\-current %A \-\-other %B \-\-marker\-size %L \-\-output %A\'\fR +.P +This tells Git to invoke the custom Git LFS merge driver, which in turn uses Git\'s merge machinery, to merge files where the \fBmerge\fR attribute is set to \fBlfs\-text\fR\. Note that \fBlfs\-text\fR here is an example and any syntactically valid value can be used\. +.P +If you are using a special type of file that needs rules different from Git\'s standard merge machinery, you can also specify the \fB\-\-program\fR option, which is passed to \fBsh\fR after substituting its own percent\-encoded escapes: +.IP "\[ci]" 4 +\fB%A\fR: the current version +.IP "\[ci]" 4 +\fB%B\fR: the other version +.IP "\[ci]" 4 +\fB%D\fR: the destination version +.IP "\[ci]" 4 +\fB%O\fR: the ancestor version +.IP "\[ci]" 4 +\fB%L\fR: the conflict marker size +.IP "" 0 +.P +Note that the percent sign must typically be doubled to prevent Git from substituting its own values here\. Therefore, specifying the default behavior explicitly looks like this: +.P +\fBconsole $ git config merge\.lfs\-text\.driver \e \'git lfs merge\-driver \-\-ancestor %O \-\-current %A \-\-other %B \-\-marker\-size %L \-\-output %A \-\-program \'\e\'\'git merge\-file \-\-stdout \-\-marker\-size=%%L %%A %%O %%B >%%D\'\e\'\'\'\fR +.P +The exit status from the custom command should be zero on success or non\-zero on conflicts or other failure\. +.P +Note that if no merge driver is specified for the value of the \fBmerge\fR attribute (as is the case by default with \fBmerge=lfs\fR), then the default Git merge strategy is used\. For LFS files, this means that Git will try to merge the pointer files, which usually is not useful\. +.SH "SEE ALSO" +git\-merge(1), git\-merge\-file(1), gitattributes(5) +.P +Part of the git\-lfs(1) suite\. diff --git a/out/merged/git-lfs-3.2.0/man/man1/git-lfs-migrate.1 b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-migrate.1 new file mode 100644 index 0000000000000000000000000000000000000000..87be8e2b8822d511b649400b2927ed8eca7f6187 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-migrate.1 @@ -0,0 +1,191 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-MIGRATE" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-migrate\fR \- Migrate history to or from Git LFS +.SH "SYNOPSIS" +\fBgit lfs migrate\fR \fImode\fR [options] [\-\-] [branch \|\.\|\.\|\.] +.SH "DESCRIPTION" +Convert files in a Git repository to or from Git LFS pointers, or summarize Git file sizes by file type\. The \fBimport\fR mode converts Git files (i\.e\., blobs) to Git LFS, while the \fBexport\fR mode does the reverse, and the \fBinfo\fR mode provides an informational summary which may be useful in deciding which files to import or export\. +.P +In all modes, by default \fBgit lfs migrate\fR operates only on the currently checked\-out branch, and only on files (of any size and type) added in commits which do not exist on any remote\. Multiple options are available to override these defaults\. +.P +When converting files to or from Git LFS, the \fBgit lfs migrate\fR command will only make changes to your local repository and working copy, never any remotes\. This is intentional as the \fBimport\fR and \fBexport\fR modes are generally "destructive" in the sense that they rewrite your Git history, changing commits and generating new commit SHAs\. (The exception is the "no\-rewrite" \fBimport\fR sub\-mode; see \fIIMPORT WITHOUT REWRITING HISTORY\fR for details\.) +.P +You should therefore always first commit or stash any uncommitted work before using the \fBimport\fR or \fBexport\fR modes, and then validate the result of the migration before pushing the changes to your remotes, for instance by running the \fBinfo\fR mode and by examining your rewritten commit history\. +.P +Once you are satisfied with the changes, you will need to force\-push the new Git history of any rewritten branches to all your remotes\. This is a step which should be taken with care, since you will be altering the Git history on your remotes\. +.P +To examine or modify files in branches other than the currently checked\-out one, branch refs may be specified directly, or provided in one or more \fB\-\-include\-ref\fR options\. They may also be excluded by prefixing them with \fB^\fR or providing them in \fB\-\-exclude\-ref\fR options\. Use the \fB\-\-everything\fR option to specify that all refs should be examined, including all remote refs\. See \fIINCLUDE AND EXCLUDE REFERENCES\fR for details\. +.P +For the \fBinfo\fR and \fBimport\fR modes, all file types are considered by default; while useful in the \fBinfo\fR mode, this is often not desirable when importing, so either filename patterns (pathspecs) or the \fB\-\-fixup\fR option should normally be specified in that case\. (At least one include pathspec is required for the \fBexport\fR mode\.) Pathspecs may be defined using the \fB\-\-include\fR and \fB\-\-exclude\fR options (\fB\-I\fR and \fB\-X\fR for short), as described in \fIINCLUDE AND EXCLUDE\fR\. +.P +As typical Git LFS usage depends on tracking specific file types using filename patterns defined in \fB\.gitattributes\fR files, the \fBgit lfs migrate\fR command will examine, create, and modify \fB\.gitattributes\fR files as necessary\. The \fB\.gitattributes\fR files will always be assigned the default read/write permissions mode (i\.e\., without execute permissions)\. Any symbolic links with that name will cause the migration to halt prematurely\. +.P +The \fBimport\fR mode (see \fIIMPORT\fR) will convert Git objects of the file types specified (e\.g\., with \fB\-\-include\fR) to Git LFS pointers, and will add entries for those file types to \fB\.gitattributes\fR files, creating those files if they do not exist\. The result should be as if \fBgit lfs track\fR commands had been run at the points in your Git history corresponding to where each type of converted file first appears\. The exception is if the \fB\-\-fixup\fR option is given, in which case the \fBimport\fR mode will only examine any existing \fB\.gitattributes\fR files and then convert Git objects which should be tracked by Git LFS but are not yet\. +.P +The \fBexport\fR mode (see \fIEXPORT\fR) works as the reverse operation to the \fBimport\fR mode, converting any Git LFS pointers that match the file types specified with \fB\-\-include\fR, which must be given at least once\. Note that \fB\.gitattributes\fR entries will not be removed, nor will the files; instead, the \fBexport\fR mode inserts "do not track" entries similar to those created by the \fBgit lfs untrack\fR command\. The \fB\-\-remote\fR option is available in the \fBexport\fR mode to specify the remote from which Git LFS objects should be fetched if they do not exist in the local Git LFS object cache; if not provided, \fBorigin\fR is used by default\. +.P +The \fBinfo\fR mode (see \fIINFO\fR) summarizes by file type (i\.e\., by filename extension) the total number and size of files in a repository\. Note that like the other two modes, by default the \fBinfo\fR mode operates only on the currently checked\-out branch and only on commits which do not exist on any remote, so to get a summary of the entire repository across all branches, use the \fB\-\-everything\fR option\. If objects have already been converted to Git LFS pointers, then by default the size of the referenced objects is totaled and reported separately\. You may also choose to ignore them by using \fB\-\-pointers=ignore\fR or to treat the pointers as files by using \fB\-\-pointers=no\-follow\fR\. (The latter option is akin to how existing Git LFS pointers were handled by the \fBinfo\fR mode in prior versions of Git LFS)\. +.P +When using the \fB\-\-everything\fR option, take note that it means all refs (local and remote) will be considered, but not necessarily all file types\. The \fBimport\fR and \fBinfo\fR modes consider all file types by default, although the \fB\-\-include\fR and \fB\-\-exclude\fR options constrain this behavior\. Also note that after importing across all branches with the \fB\-\-everything\fR option (and then checking to ensure the results are satisfactory!) it may be convenient to update multiple branches on your remotes by using the \fB\-\-all\fR option to \fBgit push\fR\. +.P +Unless the \fB\-\-skip\-fetch\fR option is given, \fBgit lfs migrate\fR always begins by fetching updated lists of refs from all the remotes returned by \fBgit remote\fR, but as noted above, after making changes to your local Git history while converting objects, it will never automatically push those changes to your remotes\. +.SH "MODES" +.IP "\[ci]" 4 +\fBinfo\fR Show information about repository size\. See \fIINFO\fR\. +.IP "\[ci]" 4 +\fBimport\fR Convert Git objects to Git LFS pointers\. See \fIIMPORT\fR and \fIIMPORT WITHOUT REWRITING HISTORY\fR\. +.IP "\[ci]" 4 +\fBexport\fR Convert Git LFS pointers to Git objects\. See \fIEXPORT\fR\. +.IP "" 0 +.SH "OPTIONS" +.TP +\fB\-I\fR \fIpaths\fR \fB\-\-include=\fR\fIpaths\fR +See \fIINCLUDE AND EXCLUDE\fR\. +.TP +\fB\-X\fR \fIpaths\fR \fB\-\-exclude=\fR\fIpaths\fR +See \fIINCLUDE AND EXCLUDE\fR\. +.TP +\fB\-\-include\-ref\fR=\fIrefname\fR +See \fIINCLUDE AND EXCLUDE REFERENCES\fR\. +.TP +\fB\-\-exclude\-ref\fR=\fIrefname\fR +See \fIINCLUDE AND EXCLUDE REFERENCES\fR\. +.TP +\fB\-\-skip\-fetch\fR +Assumes that the known set of remote references is complete, and should not be refreshed when determining the set of "un\-pushed" commits to migrate\. Has no effect when combined with \fB\-\-include\-ref\fR or \fB\-\-exclude\-ref\fR\. +.TP +\fB\-\-everything\fR +See \fIINCLUDE AND EXCLUDE REFERENCES\fR\. +.IP +Note: Git refs are "case\-sensitive" on all platforms in "packed from" (see \fBgit\-pack\-refs(1)\fR)\. On "case\-insensitive" file systems, e\.g\. NTFS on Windows or default APFS on macOS, \fBgit\-lfs\-migrate(1)\fR would only migrate the first ref if two or more refs are equal except for upper/lower case letters\. +.TP +\fB\-\-yes\fR +Assume a yes answer to any prompts, permitting noninteractive use\. Currently, the only such prompt is the one asking whether to overwrite (destroy) any working copy changes\. Thus, specifying this option may cause data loss if you are not careful\. +.TP +[branch \|\.\|\.\|\.] +Migrate only the set of branches listed\. If not given, \fBgit\-lfs\-migrate(1)\fR will migrate the currently checked out branch\. +.IP +References beginning with \fB^\fR will be excluded, whereas branches that do not begin with \fB^\fR will be included\. +.IP +If any of \fB\-\-include\-ref\fR or \fB\-\-exclude\-ref\fR are given, the checked out branch will not be appended, but branches given explicitly will be appended\. +.SS "INFO" +The \fBinfo\fR mode summarizes the sizes of file objects present in the Git history\. It supports all the core \fBmigrate\fR options and these additional ones: +.IP "\[ci]" 4 +\fB\-\-above=\fR Only count files whose individual filesize is above the given size\. \fBsize\fR may be specified as a number of bytes, or a number followed by a storage unit, e\.g\., "1b", "20 MB", "3 TiB", etc\. +.IP +If a set of files sharing a common extension has no files in that set whose individual size is above the given \fB\-\-above\fR no files no entry for that set will be shown\. +.IP "\[ci]" 4 +\fB\-\-top=\fR Only display the top \fBn\fR entries, ordered by how many total files match the given pathspec\. The default is to show only the top 5 entries\. When existing Git LFS objects are found, an extra, separate "LFS Objects" line is output in addition to the top \fBn\fR entries, unless the \fB\-\-pointers\fR option is used to change this behavior\. +.IP "\[ci]" 4 +\fB\-\-unit=\fR Format the number of bytes in each entry as a quantity of the storage unit provided\. Valid units include: * b, kib, mib, gib, tib, pib \- for IEC storage units * b, kb, mb, gb, tb, pb \- for SI storage units +.IP +If a \fB\-\-unit\fR is not specified, the largest unit that can fit the number of counted bytes as a whole number quantity is chosen\. +.IP "\[ci]" 4 +\fB\-\-pointers=[follow|no\-follow|ignore]\fR Treat existing Git LFS pointers in the history according to one of three alternatives\. In the default \fBfollow\fR case, if any pointers are found, an additional separate "LFS Objects" line item is output which summarizes the total number and size of the Git LFS objects referenced by pointers\. In the \fBignore\fR case, any pointers are simply ignored, while the \fBno\-follow\fR case replicates the behavior of the \fBinfo\fR mode in older Git LFS versions and treats any pointers it finds as if they were regular files, so the output totals only include the contents of the pointers, not the contents of the objects to which they refer\. +.IP "\[ci]" 4 +\fB\-\-fixup\fR Infer \fB\-\-include\fR and \fB\-\-exclude\fR filters on a per\-commit basis based on the \fB\.gitattributes\fR files in a repository\. In practice, this option counts any filepaths which should be tracked by Git LFS according to the repository\'s \fB\.gitattributes\fR file(s), but aren\'t already pointers\. The \fB\.gitattributes\fR files are not reported, in contrast to the normal output of the \fBinfo\fR mode\. This option is incompatible with explicitly given \fB\-\-include\fR, \fB\-\-exclude\fR filters and with any \fB\-\-pointers\fR setting other than \fBignore\fR, hence \fB\-\-fixup\fR implies \fB\-\-pointers=ignore\fR if it is not explicitly set\. +.IP "" 0 +.P +The format of the output shows the filename pattern, the total size of the file objects (excluding those below the \fB\-\-above\fR threshold, if one was defined), and the ratio of the number of files above the threshold to the total number of files; this ratio is also shown as a percentage\. For example: +.P +\fB*\.gif 93 MB 9480/10504 files(s) 90% *\.png 14 MB 1732/1877 files(s) 92%\fR +.P +By default only the top five entries are shown, but \fB\-\-top\fR allows for more or fewer to be output as desired\. +.SS "IMPORT" +The \fBimport\fR mode migrates objects present in the Git history to pointer files tracked and stored with Git LFS\. It supports all the core \fBmigrate\fR options and these additional ones: +.IP "\[ci]" 4 +\fB\-\-verbose\fR Print the commit oid and filename of migrated files to STDOUT\. +.IP "\[ci]" 4 +\fB\-\-above=\fR Only migrate files whose individual filesize is above the given size\. \fBsize\fR may be specified as a number of bytes, or a number followed by a storage unit, e\.g\., "1b", "20 MB", "3 TiB", etc\. This option cannot be used with the \fB\-\-include\fR, \fB\-\-exclude\fR, and \fB\-\-fixup\fR options\. +.IP "\[ci]" 4 +\fB\-\-object\-map=\fR Write to \fBpath\fR a file with the mapping of each rewritten commits\. The file format is CSV with this pattern: \fBOLD\-SHA\fR,\fBNEW\-SHA\fR +.IP "\[ci]" 4 +\fB\-\-no\-rewrite\fR Migrate objects to Git LFS in a new commit without rewriting Git history\. Please note that when this option is used, the \fBmigrate import\fR command will expect a different argument list, specialized options will become available, and the core \fBmigrate\fR options will be ignored\. See \fIIMPORT WITHOUT REWRITING HISTORY\fR\. +.IP "\[ci]" 4 +\fB\-\-fixup\fR Infer \fB\-\-include\fR and \fB\-\-exclude\fR filters on a per\-commit basis based on the \fB\.gitattributes\fR files in a repository\. In practice, this option imports any filepaths which should be tracked by Git LFS according to the repository\'s \fB\.gitattributes\fR file(s), but aren\'t already pointers\. This option is incompatible with explicitly given \fB\-\-include\fR, \fB\-\-exclude\fR filters\. +.IP "" 0 +.P +If \fB\-\-no\-rewrite\fR is not provided and \fB\-\-include\fR or \fB\-\-exclude\fR (\fB\-I\fR, \fB\-X\fR, respectively) are given, the \fB\.gitattributes\fR will be modified to include any new filepath patterns as given by those flags\. +.P +If \fB\-\-no\-rewrite\fR is not provided and neither of those flags are given, the gitattributes will be incrementally modified to include new filepath extensions as they are rewritten in history\. +.SS "IMPORT WITHOUT REWRITING HISTORY" +The \fBimport\fR mode has a special sub\-mode enabled by the \fB\-\-no\-rewrite\fR flag\. This sub\-mode will migrate objects to pointers as in the base \fBimport\fR mode, but will do so in a new commit without rewriting Git history\. When using this sub\-mode, the base \fBmigrate\fR options, such as \fB\-\-include\-ref\fR, will be ignored, as will those for the base \fBimport\fR mode\. The \fBmigrate\fR command will also take a different argument list\. As a result of these changes, \fB\-\-no\-rewrite\fR will only operate on the current branch \- any other interested branches must have the generated commit merged in\. +.P +The \fB\-\-no\-rewrite\fR sub\-mode supports the following options and arguments: +.IP "\[ci]" 4 +\fB\-m \-\-message=\fR Specifies a commit message for the newly created commit\. +.IP "\[ci]" 4 +[file \|\.\|\.\|\.] The list of files to import\. These files must be tracked by patterns specified in the gitattributes\. +.IP "" 0 +.P +If \fB\-\-message\fR is given, the new commit will be created with the provided message\. If no message is given, a commit message will be generated based on the file arguments\. +.SS "EXPORT" +The \fBexport\fR mode migrates Git LFS pointer files present in the Git history out of Git LFS, converting them into their corresponding object files\. It supports all the core \fBmigrate\fR options and these additional ones: +.IP "\[ci]" 4 +\fB\-\-verbose\fR Print the commit oid and filename of migrated files to STDOUT\. +.IP "\[ci]" 4 +\fB\-\-object\-map=\fR Write to \fBpath\fR a file with the mapping of each rewritten commit\. The file format is CSV with this pattern: \fBOLD\-SHA\fR,\fBNEW\-SHA\fR +.IP "\[ci]" 4 +\fB\-\-remote=\fR Download LFS objects from the provided \fBgit\-remote\fR during the export\. If not provided, defaults to \fBorigin\fR\. +.IP "" 0 +.P +The \fBexport\fR mode requires at minimum a pattern provided with the \fB\-\-include\fR argument to specify which files to export\. Files matching the \fB\-\-include\fR patterns will be removed from Git LFS, while files matching the \fB\-\-exclude\fR patterns will retain their Git LFS status\. The export command will modify the \fB\.gitattributes\fR to set/unset any filepath patterns as given by those flags\. +.SH "INCLUDE AND EXCLUDE" +You can specify that \fBgit lfs migrate\fR should only convert files whose pathspec matches the \fB\-\-include\fR glob patterns and does not match the \fB\-\-exclude\fR glob patterns, either to reduce total migration time or to only migrate part of your repo\. Multiple patterns may be given using commas as delimiters\. +.P +Pattern matching is done so as to be functionally equivalent to the pattern matching format of \fB\.gitattributes\fR\. In addition to simple file extension matches (e\.g\., \fB*\.gif\fR) patterns may also specify directory paths, in which case the \fBpath/**\fR format may be used to match recursively\. +.P +Note that this form of pattern matching for the \fB\-\-include\fR and \fB\-\-exclude\fR options used by the \fBgit lfs migrate\fR command is unique among the suite of \fBgit lfs\fR commands\. Other commands which also take these options, such as \fBgit lfs ls\-files\fR, use the gitignore(5) form of pattern matching instead\. +.SH "INCLUDE AND EXCLUDE REFERENCES" +You can specify that \fBgit lfs migrate\fR should only convert files added in commits reachable from certain references, namely those defined using one or more \fB\-\-include\-ref\fR options, and should ignore files in commits reachable from references defined in \fB\-\-exclude\-ref\fR options\. +.P +\fBD\-\-\-E\-\-\-F / \e A\-\-\-B\-\-\-\-\-\-C refs/heads/my\-feature \e \e \e refs/heads/main \e refs/remotes/origin/main\fR +.P +In the above configuration, the following commits are reachable by each ref: +.P +\fBrefs/heads/main: C, B, A refs/heads/my\-feature: F, E, D, B, A refs/remote/origin/main: A\fR +.P +The following \fBgit lfs migrate\fR options would, therefore, include commits F, E, D, C, and B, but exclude commit A: +.P +\fB\-\-include\-ref=refs/heads/my\-feature \-\-include\-ref=refs/heads/main \-\-exclude\-ref=refs/remotes/origin/main\fR +.P +The presence of flag \fB\-\-everything\fR indicates that all local and remote references should be migrated\. +.SH "EXAMPLES" +.SS "Migrate unpushed commits" +A common use case for the migrate command is to convert large Git objects to LFS before pushing your commits\. By default, it only scans commits that don\'t exist on any remote, so long as the repository is non\-bare\. +.P +First, run \fBgit lfs migrate info\fR to list the file types taking up the most space in your repository: +.P +\fB$ git lfs migrate info migrate: Fetching remote refs: \|\.\|\.\|\., done migrate: Sorting commits: \|\.\|\.\|\., done migrate: Examining commits: 100% (1/1), done *\.mp3 284 MB 1/1 files(s) 100% *\.pdf 42 MB 8/8 files(s) 100% *\.psd 9\.8 MB 15/15 files(s) 100% *\.ipynb 6\.9 MB 6/6 files(s) 100% *\.csv 5\.8 MB 2/2 files(s) 100%\fR +.P +Now, you can run \fBgit lfs migrate import\fR to convert some file types to LFS: +.P +\fB$ git lfs migrate import \-\-include="*\.mp3,*\.psd" migrate: Fetching remote refs: \|\.\|\.\|\., done migrate: Sorting commits: \|\.\|\.\|\., done migrate: Rewriting commits: 100% (1/1), done main d2b959babd099fe70da1c1512e2475e8a24de163 \-> 136e706bf1ae79643915c134e17a6c933fd53c61 migrate: Updating refs: \|\.\|\.\|\., done\fR +.P +If after conversion you find that some files in your working directory have been replaced with Git LFS pointers, this is normal, and the working copies of these files can be repopulated with their full expected contents by using \fBgit lfs checkout\fR\. +.SS "Migrate local history" +You can also migrate the entire history of your repository: +.P +``` # Check for large files and existing Git LFS objects in your local main branch $ git lfs migrate info \-\-include\-ref=main$ git lfs migrate info \-\-everything# and listing the top 100 or fewer results $ git lfs migrate info \-\-everything \-\-pointers=ignore \-\-top=100 ``` +.P +The same flags will work in \fBimport\fR mode: +.P +``` # Convert all zip files in your main branch $ git lfs migrate import \-\-include\-ref=main \-\-include="*\.zip"$ git lfs migrate import \-\-everything \-\-include="*\.zip"$ git lfs migrate import \-\-everything \-\-above=100Kb ``` +.P +Note: This will require a force\-push to any existing Git remotes\. Using the \fB\-\-all\fR option when force\-pushing may be convenient if many refs were updated, e\.g\., after importing to Git LFS with the \fB\-\-everything\fR option\. +.SS "Migrate without rewriting local history" +You can also migrate files without modifying the existing history of your repository\. Note that in the examples below, files in subdirectories are not included because they are not explicitly specified\. +.P +Without a specified commit message: +.P +\fB$ git lfs migrate import \-\-no\-rewrite test\.zip *\.mp3 *\.psd\fR +.P +With a specified commit message: +.P +\fB$ git lfs migrate import \-\-no\-rewrite \e \-m "Import test\.zip, \.mp3, \.psd files in root of repo" \e test\.zip *\.mp3 *\.psd\fR +.SH "SEE ALSO" +git\-lfs\-checkout(1), git\-lfs\-ls\-files(1), git\-lfs\-track(1), git\-lfs\-untrack(1), gitattributes(5), gitignore(5)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/out/merged/git-lfs-3.2.0/man/man1/git-lfs-pointer.1 b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-pointer.1 new file mode 100644 index 0000000000000000000000000000000000000000..36be15c942cbcc3297d47ffc28bfdf9410d81ae0 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-pointer.1 @@ -0,0 +1,34 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-POINTER" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-pointer\fR \- Build, compare, and check pointers +.SH "SYNOPSIS" +\fBgit lfs pointer \-\-file=path/to/file\fR +.br +\fBgit lfs pointer \-\-file=path/to/file \-\-pointer=path/to/pointer\fR +.br +\fBgit lfs pointer \-\-file=path/to/file \-\-stdin\fR \fBgit lfs pointer \-\-check \-\-file=path/to/file\fR +.SH "Description" +Builds and optionally compares generated pointer files to ensure consistency between different Git LFS implementations\. +.SH "OPTIONS" +.TP +\fB\-\-file\fR +A local file to build the pointer from\. +.TP +\fB\-\-pointer\fR +A local file including the contents of a pointer generated from another implementation\. This is compared to the pointer generated from \fB\-\-file\fR\. +.TP +\fB\-\-stdin\fR +Reads the pointer from STDIN to compare with the pointer generated from \fB\-\-file\fR\. +.TP +\fB\-\-check\fR +Reads the pointer from STDIN (if \fB\-\-stdin\fR is given) or the filepath (if \fB\-\-file\fR) is given\. If neither or both of \fB\-\-stdin\fR and \fB\-\-file\fR are given, the invocation is invalid\. Exits 0 if the data read is a valid Git LFS pointer\. Exits 1 otherwise\. +.TP +\fB\-\-strict\fR: + +.TP +\fB\-\-no\-strict\fR +In conjunction with \fB\-\-check\fR, \fB\-\-strict\fR verifies that the pointer is canonical; that is, it would be the one created by Git LFS\. If it is not, exits 2\. The default, for backwards compatibility, is \fB\-\-no\-strict\fR, but this may change in a future version\. +.SH "SEE ALSO" +Part of the git\-lfs(1) suite\. diff --git a/out/merged/git-lfs-3.2.0/man/man1/git-lfs-post-checkout.1 b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-post-checkout.1 new file mode 100644 index 0000000000000000000000000000000000000000..a1ec45e409080fb62e3b6243cfdeee45e317d53d --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-post-checkout.1 @@ -0,0 +1,13 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-POST\-CHECKOUT" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-post\-checkout\fR \- Git post\-checkout hook implementation +.SH "SYNOPSIS" +\fBgit lfs post\-checkout\fR \fIrev_before\fR \fIref_after\fR \fIis_branch_checkout\fR +.SH "DESCRIPTION" +Responds to Git post\-checkout events\. It makes sure that any files which are marked as lockable by \fBgit lfs track\fR are read\-only in the working copy, if not currently locked by the local user\. +.SH "SEE ALSO" +git\-lfs\-track(1) +.P +Part of the git\-lfs(1) suite\. diff --git a/out/merged/git-lfs-3.2.0/man/man1/git-lfs-post-commit.1 b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-post-commit.1 new file mode 100644 index 0000000000000000000000000000000000000000..447b79970102bd1632da9cdd34099c168a142ce6 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-post-commit.1 @@ -0,0 +1,15 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-POST\-COMMIT" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-post\-commit\fR \- Git post\-commit hook implementation +.SH "SYNOPSIS" +\fBgit lfs post\-commit\fR +.SH "DESCRIPTION" +Responds to Git post\-commit events\. It makes sure that any files which are marked as lockable by \fBgit lfs track\fR are read\-only in the working copy, if not currently locked by the local user\. +.P +Where the \fBgit lfs post\-merge\fR command, which has a similar purpose, must examine all files in the working copy, \fBgit lfs post\-commit\fR can limit itself checking only those files which have changed in \fBHEAD\fR\. It primarily handles newly added lockable files which have not yet been made read\-only\. +.SH "SEE ALSO" +git\-lfs\-post\-merge(1), git\-lfs\-track(1) +.P +Part of the git\-lfs(1) suite\. diff --git a/out/merged/git-lfs-3.2.0/man/man1/git-lfs-post-merge.1 b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-post-merge.1 new file mode 100644 index 0000000000000000000000000000000000000000..c5ae4bdede5a6577be0461c533f89a33bf14e50c --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-post-merge.1 @@ -0,0 +1,13 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-POST\-MERGE" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-post\-merge\fR \- Git post\-merge hook implementation +.SH "SYNOPSIS" +\fBgit lfs post\-merge\fR \fIis_squash\fR +.SH "DESCRIPTION" +Responds to Git post\-merge events\. It makes sure that any files which are marked as lockable by \fBgit lfs track\fR are read\-only in the working copy, if not currently locked by the local user\. +.SH "SEE ALSO" +git\-lfs\-track(1) +.P +Part of the git\-lfs(1) suite\. diff --git a/out/merged/git-lfs-3.2.0/man/man1/git-lfs-pre-push.1 b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-pre-push.1 new file mode 100644 index 0000000000000000000000000000000000000000..d030265f488ed507483f9356ace54a3a269b9912 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-pre-push.1 @@ -0,0 +1,30 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-PRE\-PUSH" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-pre\-push\fR \- Git pre\-push hook implementation +.SH "SYNOPSIS" +\fBgit lfs pre\-push\fR \fIremote\fR [remoteurl] +.SH "DESCRIPTION" +Responds to Git pre\-hook events\. It reads the range of commits from STDIN, in the following format: +.IP "" 4 +.nf + SP SP SP \en +.fi +.IP "" 0 +.P +It also takes the remote name and URL as arguments\. +.P +If any of those Git objects are associated with Git LFS objects, those objects will be pushed to the Git LFS API\. +.P +In the case of pushing a new branch, the list of Git objects will be all of the Git objects in this branch\. +.P +In the case of deleting a branch, no attempts to push Git LFS objects will be made\. +.SH "OPTIONS" +.TP +\fBGIT_LFS_SKIP_PUSH\fR +Do nothing on pre\-push\. For more, see: git\-lfs\-config(5)\. +.SH "SEE ALSO" +git\-lfs\-clean(1), git\-lfs\-push(1)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/out/merged/git-lfs-3.2.0/man/man1/git-lfs-prune.1 b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-prune.1 new file mode 100644 index 0000000000000000000000000000000000000000..5ac9759095f1d278bd2f3d3874fed214fb6e4c15 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-prune.1 @@ -0,0 +1,83 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-PRUNE" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-prune\fR \- Delete old LFS files from local storage +.SH "SYNOPSIS" +\fBgit lfs prune\fR \fIoptions\fR +.SH "DESCRIPTION" +Deletes local copies of LFS files which are old, thus freeing up disk space\. Prune operates by enumerating all the locally stored objects, and then deleting any which are not referenced by at least ONE of the following: +.IP "\[ci]" 4 +the current checkout +.IP "\[ci]" 4 +all existing stashes +.IP "\[ci]" 4 +a \'recent branch\'; see \fIRECENT FILES\fR +.IP "\[ci]" 4 +a \'recent commit\' on the current branch or recent branches; see \fIRECENT FILES\fR +.IP "\[ci]" 4 +a commit which has not been pushed; see \fIUNPUSHED LFS FILES\fR +.IP "\[ci]" 4 +any other worktree checkouts; see git\-worktree(1) +.IP "" 0 +.P +In general terms, prune will delete files you\'re not currently using and which are not \'recent\', so long as they\'ve been pushed i\.e\. the local copy is not the only one\. +.P +The reflog is not considered, only commits\. Therefore LFS objects that are only referenced by orphaned commits are always deleted\. +.P +Note: you should not run \fBgit lfs prune\fR if you have different repositories sharing the same custom storage directory; see git\-lfs\-config(5) for more details about \fBlfs\.storage\fR option\. +.P +In your Git configuration or in a \fB\.lfsconfig\fR file, you may set \fBlfs\.fetchexclude\fR to a comma\-separated list of paths\. If \fBlfs\.fetchexclude\fR is defined, then any Git LFS files whose paths match one in that list will be pruned unless they are referenced by a stash or an unpushed commit\. Paths are matched using wildcard matching as per gitignore(5)\. +.SH "OPTIONS" +.IP "\[ci]" 4 +\fB\-\-dry\-run\fR \fB\-d\fR Don\'t actually delete anything, just report on what would have been done +.IP "\[ci]" 4 +\fB\-\-force\fR \fB\-f\fR Prune all objects except unpushed objects, including objects required for currently checked out refs\. Implies \fB\-\-recent\fR\. +.IP "\[ci]" 4 +\fB\-\-recent\fR Prune even objects that would normally be preserved by the configuration options specified below in \fIRECENT FILES\fR\. +.IP "\[ci]" 4 +\fB\-\-verify\-remote\fR \fB\-c\fR Contact the remote and check that copies of the files we would delete definitely exist before deleting\. See \fIVERIFY REMOTE\fR\. +.IP "\[ci]" 4 +\fB\-\-no\-verify\-remote\fR Disables remote verification if lfs\.pruneverifyremotealways was enabled in settings\. See \fIVERIFY REMOTE\fR\. +.IP "\[ci]" 4 +\fB\-\-verbose\fR \fB\-v\fR Report the full detail of what is/would be deleted\. +.IP "" 0 +.SH "RECENT FILES" +Prune won\'t delete LFS files referenced by \'recent\' commits, in case you want to use them again without having to download\. The definition of \'recent\' is derived from the one used by git\-lfs\-fetch(1) to download recent objects with the \fB\-\-recent\fR option, with an offset of a number of days (default 3) to ensure that we always keep files you download for a few days\. +.P +Here are the git\-config(1) settings that control this behaviour: +.IP "\[ci]" 4 +\fBlfs\.pruneoffsetdays\fR +.br +The number of extra days added to the fetch recent settings when using them to decide when to prune\. So for a reference to be considered old enough to prune, it has to be this many days older than the oldest reference that would be downloaded via \fBgit lfs fetch \-\-recent\fR\. Only used if the relevant fetch recent \'days\' setting is non\-zero\. Default 3 days\. +.IP "\[ci]" 4 +\fBlfs\.fetchrecentrefsdays\fR +.br +\fBlfs\.fetchrecentremoterefs\fR +.br +\fBlfs\.fetchrecentcommitsdays\fR +.br +These have the same meaning as git\-lfs\-fetch(1) with the \fB\-\-recent\fR option, they are used as a base for the offset above\. Anything which falls outside of this offsetted window is considered old enough to prune\. If a day value is zero, that condition is not used at all to retain objects and they will be pruned\. +.IP "" 0 +.SH "UNPUSHED LFS FILES" +When the only copy of an LFS file is local, and it is still reachable from any reference, that file can never be pruned, regardless of how old it is\. +.P +To determine whether an LFS file has been pushed, we check the difference between local refs and remote refs; where the local ref is ahead, any LFS files referenced in those commits is unpushed and will not be deleted\. This works because the LFS pre\-push hook always ensures that LFS files are pushed before the remote branch is updated\. +.P +See \fIDEFAULT REMOTE\fR, for which remote is considered \'pushed\' for pruning purposes\. +.SH "VERIFY REMOTE" +The \fB\-\-verify\-remote\fR option calls the remote to ensure that any LFS files to be deleted have copies on the remote before actually deleting them\. +.P +Usually the check performed by \fIUNPUSHED LFS FILES\fR is enough to determine that files have been pushed, but if you want to be extra sure at the expense of extra overhead you can make prune actually call the remote API and verify the presence of the files you\'re about to delete locally\. See \fIDEFAULT REMOTE\fR for which remote is checked\. +.P +You can make this behaviour the default by setting \fBlfs\.pruneverifyremotealways\fR to true\. +.P +In addition to the overhead of calling the remote, using this option also requires prune to distinguish between totally unreachable files (e\.g\. those that were added to the index but never committed, or referenced only by orphaned commits), and files which are still referenced, but by commits which are prunable\. This makes the prune process take longer\. +.SH "DEFAULT REMOTE" +When identifying \fIUNPUSHED LFS FILES\fR and performing \fIVERIFY REMOTE\fR, a single remote, \'origin\', is normally used as the reference\. This one remote is considered canonical; even if you use multiple remotes, you probably want to retain your local copies until they\'ve made it to that remote\. \'origin\' is used by default because that will usually be a main central repo, or your fork of it \- in both cases that\'s a valid remote backup of your work\. If origin doesn\'t exist then by default nothing will be pruned because everything is treated as \'unpushed\'\. +.P +You can alter the remote via git config: \fBlfs\.pruneremotetocheck\fR\. Set this to a different remote name to check that one instead of \'origin\'\. +.SH "SEE ALSO" +git\-lfs\-fetch(1), gitignore(5)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/out/merged/git-lfs-3.2.0/man/man1/git-lfs-pull.1 b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-pull.1 new file mode 100644 index 0000000000000000000000000000000000000000..6a278668bd099ee70fd1a4868969a7ac80b547d6 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-pull.1 @@ -0,0 +1,32 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-PULL" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-pull\fR \- Download all Git LFS files for current ref & checkout +.SH "SYNOPSIS" +\fBgit lfs pull\fR [options] [\fIremote\fR] +.SH "DESCRIPTION" +Download Git LFS objects for the currently checked out ref, and update the working copy with the downloaded content if required\. +.P +This is equivalent to running the following 2 commands: +.P +git lfs fetch [options] [\fIremote\fR] git lfs checkout +.SH "OPTIONS" +.TP +\fB\-I\fR \fIpaths\fR \fB\-\-include=\fR\fIpaths\fR +Specify lfs\.fetchinclude just for this invocation; see \fIINCLUDE AND EXCLUDE\fR +.TP +\fB\-X\fR \fIpaths\fR \fB\-\-exclude=\fR\fIpaths\fR +Specify lfs\.fetchexclude just for this invocation; see \fIINCLUDE AND EXCLUDE\fR +.SH "INCLUDE AND EXCLUDE" +You can configure Git LFS to only fetch objects to satisfy references in certain paths of the repo, and/or to exclude certain paths of the repo, to reduce the time you spend downloading things you do not use\. +.P +In your Git configuration or in a \fB\.lfsconfig\fR file, you may set either or both of \fBlfs\.fetchinclude\fR and \fBlfs\.fetchexclude\fR to comma\-separated lists of paths\. If \fBlfs\.fetchinclude\fR is defined, Git LFS objects will only be fetched if their path matches one in that list, and if \fBlfs\.fetchexclude\fR is defined, Git LFS objects will only be fetched if their path does not match one in that list\. Paths are matched using wildcard matching as per gitignore(5)\. +.P +Note that using the command\-line options \fB\-I\fR and \fB\-X\fR override the respective configuration settings\. Setting either option to an empty string clears the value\. +.SH "DEFAULT REMOTE" +Without arguments, pull downloads from the default remote\. The default remote is the same as for \fBgit pull\fR, i\.e\. based on the remote branch you\'re tracking first, or origin otherwise\. +.SH "SEE ALSO" +git\-lfs\-fetch(1), git\-lfs\-checkout(1), gitignore(5)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/out/merged/git-lfs-3.2.0/man/man1/git-lfs-push.1 b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-push.1 new file mode 100644 index 0000000000000000000000000000000000000000..4e1be28a7c81fc849f1dc95a4f18e186c38c8c30 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-push.1 @@ -0,0 +1,27 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-PUSH" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-push\fR \- Push queued large files to the Git LFS endpoint +.SH "SYNOPSIS" +\fBgit lfs push\fR \fIoptions\fR \fIremote\fR [\fIref\fR\|\.\|\.\|\.] +.br +\fBgit lfs push\fR \fIremote\fR [\fIref\fR\|\.\|\.\|\.] +.br +\fBgit lfs push\fR \-\-object\-id \fIremote\fR [\fIoid\fR\|\.\|\.\|\.] +.SH "DESCRIPTION" +Upload Git LFS files to the configured endpoint for the current Git remote\. By default, it filters out objects that are already referenced by the local clone of the remote\. +.SH "OPTIONS" +.TP +\fB\-\-dry\-run\fR +Print the files that would be pushed, without actually pushing them\. +.TP +\fB\-\-all\fR +This pushes all objects to the remote that are referenced by any commit reachable from the refs provided as arguments\. If no refs are provided, then all local refs are pushed\. Note that this behavior differs from that of git\-lfs\-fetch(1) when its \fB\-\-all\fR option is used; in that case, all refs are fetched, including refs other than those under \fBrefs/heads\fR and \fBrefs/tags\fR\. If you are migrating a repository with these commands, make sure to run \fBgit lfs push\fR for any additional remote refs that contain Git LFS objects not reachable from your local refs\. +.TP +\fB\-\-object\-id\fR +This pushes only the object OIDs listed at the end of the command, separated by spaces\. +.SH "SEE ALSO" +git\-lfs\-fetch(1), git\-lfs\-pre\-push(1)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/out/merged/git-lfs-3.2.0/man/man1/git-lfs-smudge.1 b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-smudge.1 new file mode 100644 index 0000000000000000000000000000000000000000..b59233e308de3485dabe1bb08d61befdb5f7fbbb --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-smudge.1 @@ -0,0 +1,27 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-SMUDGE" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-smudge\fR \- Git smudge filter that converts pointer in blobs to the actual content +.SH "SYNOPSIS" +\fBgit lfs smudge\fR [\fIpath\fR] \fBgit lfs smudge\fR \-\-skip [\fIpath\fR] +.SH "DESCRIPTION" +Read a Git LFS pointer file from standard input and write the contents of the corresponding large file to standard output\. If needed, download the file\'s contents from the Git LFS endpoint\. The \fIpath\fR argument, if provided, is only used for a progress bar\. +.P +Smudge is typically run by Git\'s smudge filter, configured by the repository\'s Git attributes\. +.P +In your Git configuration or in a \fB\.lfsconfig\fR file, you may set either or both of \fBlfs\.fetchinclude\fR and \fBlfs\.fetchexclude\fR to comma\-separated lists of paths\. If \fBlfs\.fetchinclude\fR is defined, Git LFS pointer files will only be replaced with the contents of the corresponding Git LFS object file if their path matches one in that list, and if \fBlfs\.fetchexclude\fR is defined, Git LFS pointer files will only be replaced with the contents of the corresponding Git LFS object file if their path does not match one in that list\. Paths are matched using wildcard matching as per gitignore(5)\. Git LFS pointer files that are not replaced with the contents of their corresponding object files are simply copied to standard output without change\. +.SH "OPTIONS" +Without any options, \fBgit lfs smudge\fR outputs the raw Git LFS content to standard output\. +.TP +\fB\-\-skip\fR +Skip automatic downloading of objects on clone or pull\. +.TP +\fBGIT_LFS_SKIP_SMUDGE\fR +Disables the smudging process\. For more, see: git\-lfs\-config(5)\. +.SH "KNOWN BUGS" +On Windows, Git before 2\.34\.0 does not handle files in the working tree larger than 4 gigabytes\. Newer versions of Git, as well as Unix versions, are unaffected\. +.SH "SEE ALSO" +git\-lfs\-install(1), gitattributes(5), gitignore(5)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/out/merged/git-lfs-3.2.0/man/man1/git-lfs-standalone-file.1 b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-standalone-file.1 new file mode 100644 index 0000000000000000000000000000000000000000..938f676785c8da51adb52fe29a0e515a13992fcb --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-standalone-file.1 @@ -0,0 +1,15 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-STANDALONE\-FILE" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-standalone\-file\fR \- Standalone transfer adapter for file URLs +.SH "SYNOPSIS" +\fBgit lfs standalone\-file\fR +.SH "DESCRIPTION" +Provides a standalone transfer adapter for file URLs (local paths)\. +.P +By default, Git LFS requires the support of an HTTP server to implement the Git LFS protocol\. However, this tool allows the use of URLs starting with \fBfile:///\fR (that is, those representing local paths) in addition\. Configuration is not necessary; Git LFS handles this internally\. +.P +When invoked, this tool speaks JSON on input and output as a standalone transfer adapter\. It is not intended for use by end users\. +.SH "SEE ALSO" +Part of the git\-lfs(1) suite\. diff --git a/out/merged/git-lfs-3.2.0/man/man1/git-lfs-status.1 b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-status.1 new file mode 100644 index 0000000000000000000000000000000000000000..efd627b3add046209edc4603a63d1bfcc653d50e --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-status.1 @@ -0,0 +1,29 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-STATUS" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-status\fR \- Show the status of Git LFS files in the working tree +.SH "SYNOPSIS" +\fBgit lfs status\fR [\fIoptions\fR] +.SH "DESCRIPTION" +Display paths of Git LFS objects that +.IP "\[ci]" 4 +have not been pushed to the Git LFS server\. These are large files that would be uploaded by \fBgit push\fR\. +.IP "\[ci]" 4 +have differences between the index file and the current HEAD commit\. These are large files that would be committed by \fBgit commit\fR\. +.IP "\[ci]" 4 +have differences between the working tree and the index file\. These are files that could be staged using \fBgit add\fR\. +.IP "" 0 +.P +This command must be run in a non\-bare repository\. +.SH "OPTIONS" +.TP +\fB\-\-porcelain\fR +Give the output in an easy\-to\-parse format for scripts\. +.TP +\fB\-\-json\fR +Give the output in a stable json format for scripts\. +.SH "SEE ALSO" +git\-lfs\-ls\-files(1)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/out/merged/git-lfs-3.2.0/man/man1/git-lfs-track.1 b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-track.1 new file mode 100644 index 0000000000000000000000000000000000000000..060599334a22337eb5d9c2e757a95194001f6b1b --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-track.1 @@ -0,0 +1,48 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-TRACK" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-track\fR \- View or add Git LFS paths to Git attributes +.SH "SYNOPSIS" +\fBgit lfs track\fR [options] [\fIpattern\fR\|\.\|\.\|\.] +.SH "DESCRIPTION" +Start tracking the given patterns(s) through Git LFS\. The \fIpattern\fR argument is written to \.gitattributes\. If no paths are provided, simply list the currently\-tracked paths\. +.P +The gitattributes documentation \fIhttps://git\-scm\.com/docs/gitattributes\fR states that patterns use the gitignore pattern rules \fIhttps://git\-scm\.com/docs/gitignore\fR to match paths\. This means that patterns which contain asterisk (\fB*\fR), question mark (\fB?\fR), and the bracket characters (\fB[\fR and \fB]\fR) are treated specially; to disable this behavior and treat them literally instead, use \fB\-\-filename\fR or escape the character with a backslash\. +.SH "OPTIONS" +.IP "\[ci]" 4 +\fB\-\-verbose\fR \fB\-v\fR: If enabled, have \fBgit lfs track\fR log files which it will touch\. Disabled by default\. +.IP "\[ci]" 4 +\fB\-\-dry\-run\fR \fB\-d\fR: If enabled, have \fBgit lfs track\fR log all actions it would normally take (adding entries to \.gitattributes, touching files on disk, etc) without performing any mutative operations to the disk\. +.IP +\fBgit lfs track \-\-dry\-run [files]\fR also implicitly mocks the behavior of passing the \fB\-\-verbose\fR, and will log in greater detail what it is doing\. +.IP +Disabled by default\. +.IP "\[ci]" 4 +\fB\-\-filename\fR Treat the arguments as literal filenames, not as patterns\. Any special glob characters in the filename will be escaped when writing the \fB\.gitattributes\fR file\. +.IP "\[ci]" 4 +\fB\-\-lockable\fR \fB\-l\fR Make the paths \'lockable\', meaning they should be locked to edit them, and will be made read\-only in the working copy when not locked\. +.IP "\[ci]" 4 +\fB\-\-not\-lockable\fR Remove the lockable flag from the paths so they are no longer read\-only unless locked\. +.IP "\[ci]" 4 +\fB\-\-no\-excluded\fR Do not list patterns that are excluded in the output; only list patterns that are tracked\. +.IP "\[ci]" 4 +\fB\-\-no\-modify\-attrs\fR Makes matched entries stat\-dirty so that Git can re\-index files you wish to convert to LFS\. Does not modify any \fB\.gitattributes\fR file(s)\. +.IP "" 0 +.SH "EXAMPLES" +.TP +List the patterns that Git LFS is currently tracking: + +.TP +Configure Git LFS to track GIF files: + +.TP +Configure Git LFS to track PSD files and make them read\-only unless locked: + +.TP +Configure Git LFS to track the file named \fBproject [1]\.psd\fR: + +.SH "SEE ALSO" +git\-lfs\-untrack(1), git\-lfs\-install(1), gitattributes(5), gitignore(5)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/out/merged/git-lfs-3.2.0/man/man1/git-lfs-uninstall.1 b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-uninstall.1 new file mode 100644 index 0000000000000000000000000000000000000000..9589a7239d364ac262b0dcd4222c5f930a73f3d1 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-uninstall.1 @@ -0,0 +1,31 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-UNINSTALL" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-uninstall\fR \- Remove Git LFS configuration +.SH "SYNOPSIS" +\fBgit lfs uninstall\fR +.SH "DESCRIPTION" +Perform the following actions to remove the Git LFS configuration: +.IP "\[ci]" 4 +Remove the "lfs" clean and smudge filters from the global Git config\. +.IP "\[ci]" 4 +Uninstall the Git LFS pre\-push hook if run from inside a Git repository\. +.IP "" 0 +.SH "OPTIONS" +.TP +\-\-local +Removes the "lfs" smudge and clean filters from the local repository\'s git config, instead of the global git config (~/\.gitconfig)\. +.TP +\-\-worktree +Removes the "lfs" smudge and clean filters from the current working tree\'s git config, instead of the global git config (~/\.gitconfig) or local repository\'s git config ($GIT_DIR/config)\. If multiple working trees are in use, the Git config extension \fBworktreeConfig\fR must be enabled to use this option\. If only one working tree is in use, \fB\-\-worktree\fR has the same effect as \fB\-\-local\fR\. This option is only available if the installed Git version is at least 2\.20\.0 and therefore supports the "worktreeConfig" extension\. +.TP +\-\-system +Removes the "lfs" smudge and clean filters from the system git config, instead of the global git config (~/\.gitconfig)\. +.TP +\-\-skip\-repo +Skips cleanup of the local repo; use if you want to uninstall the global lfs filters but not make changes to the current repo\. +.SH "SEE ALSO" +git\-lfs\-install(1), git\-worktree(1)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/out/merged/git-lfs-3.2.0/man/man1/git-lfs-unlock.1 b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-unlock.1 new file mode 100644 index 0000000000000000000000000000000000000000..f149a2e3a981f608d75418784584e385e6f1b5a8 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-unlock.1 @@ -0,0 +1,26 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-UNLOCK" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-unlock\fR \- Remove "locked" setting for a file on the Git LFS server +.SH "SYNOPSIS" +\fBgit lfs unlock\fR \fIOPTIONS\fR \fIpath\fR +.SH "DESCRIPTION" +Removes the given file path as "locked" on the Git LFS server\. Files must exist and have a clean git status before they can be unlocked\. The \fB\-\-force\fR flag will skip these checks\. +.SH "OPTIONS" +.TP +\fB\-r\fR \fIname\fR \fB\-\-remote=\fR\fIname\fR +Specify the Git LFS server to use\. Ignored if the \fBlfs\.url\fR config key is set\. +.TP +\fB\-f\fR \fB\-\-force\fR +Tells the server to remove the lock, even if it\'s owned by another user\. +.TP +\fB\-i \fR \fB\-\-id=\fR +Specifies a lock by its ID instead of path\. +.TP +\fB\-\-json\fR +Writes lock info as JSON to STDOUT if the command exits successfully\. Intended for interoperation with external tools\. If the command returns with a non\-zero exit code, plain text messages will be sent to STDERR\. +.SH "SEE ALSO" +git\-lfs\-lock(1), git\-lfs\-locks(1)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/out/merged/git-lfs-3.2.0/man/man1/git-lfs-untrack.1 b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-untrack.1 new file mode 100644 index 0000000000000000000000000000000000000000..33ca6a3c4617907f0caf32cbdececf525fb1376b --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-untrack.1 @@ -0,0 +1,17 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-UNTRACK" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-untrack\fR \- Remove Git LFS paths from Git Attributes +.SH "SYNOPSIS" +\fBgit lfs untrack\fR \fIpath\fR\|\.\|\.\|\. +.SH "DESCRIPTION" +Stop tracking the given path(s) through Git LFS\. The \fIpath\fR argument can be a glob pattern or a file path\. +.SH "EXAMPLES" +.TP +Configure Git LFS to stop tracking GIF files: + +.SH "SEE ALSO" +git\-lfs\-track(1), git\-lfs\-install(1), gitattributes(5)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/out/merged/git-lfs-3.2.0/man/man1/git-lfs-update.1 b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-update.1 new file mode 100644 index 0000000000000000000000000000000000000000..cdb653987af842ed5241b0bc56e185c2406a91c6 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/man1/git-lfs-update.1 @@ -0,0 +1,20 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-UPDATE" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-update\fR \- Update Git hooks +.SH "SYNOPSIS" +.TS +allbox; +\fBgit lfs update\fR [\-\-manual \-\-force] +.TE +.SH "DESCRIPTION" +Updates the Git hooks used by Git LFS\. Silently upgrades known hook contents\. If you have your own custom hooks you may need to use one of the extended options below\. +.SH "OPTIONS" +.IP "\[ci]" 4 +\fB\-\-manual\fR \fB\-m\fR Print instructions for manually updating your hooks to include git\-lfs functionality\. Use this option if \fBgit lfs update\fR fails because of existing hooks and you want to retain their functionality\. +.IP "\[ci]" 4 +\fB\-\-force\fR \fB\-f\fR Forcibly overwrite any existing hooks with git\-lfs hooks\. Use this option if \fBgit lfs update\fR fails because of existing hooks but you don\'t care about their current contents\. +.IP "" 0 +.SH "SEE ALSO" +Part of the git\-lfs(1) suite\. diff --git a/out/merged/git-lfs-3.2.0/man/man1/git-lfs.1 b/out/merged/git-lfs-3.2.0/man/man1/git-lfs.1 new file mode 100644 index 0000000000000000000000000000000000000000..260b66788b06c32b839f810599cf421ac4396ca0 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/man1/git-lfs.1 @@ -0,0 +1,149 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS" "1" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\fR \- Work with large files in Git repositories +.SH "SYNOPSIS" +\fBgit lfs\fR \fIcommand\fR [\fIargs\fR] +.SH "DESCRIPTION" +Git LFS is a system for managing and versioning large files in association with a Git repository\. Instead of storing the large files within the Git repository as blobs, Git LFS stores special "pointer files" in the repository, while storing the actual file contents on a Git LFS server\. The contents of the large file are downloaded automatically when needed, for example when a Git branch containing the large file is checked out\. +.P +Git LFS works by using a "smudge" filter to look up the large file contents based on the pointer file, and a "clean" filter to create a new version of the pointer file when the large file\'s contents change\. It also uses a \fBpre\-push\fR hook to upload the large file contents to the Git LFS server whenever a commit containing a new large file version is about to be pushed to the corresponding Git server\. +.SH "COMMANDS" +Like Git, Git LFS commands are separated into high level ("porcelain") commands and low level ("plumbing") commands\. +.SS "High level porcelain commands" +.TP +git\-lfs\-checkout(1) +Populate working copy with real content from Git LFS files\. +.TP +git\-lfs\-dedup(1) +De\-duplicate Git LFS files\. +.TP +git\-lfs\-env(1) +Display the Git LFS environment\. +.TP +git\-lfs\-ext(1) +Display Git LFS extension details\. +.TP +git\-lfs\-fetch(1) +Download Git LFS files from a remote\. +.TP +git\-lfs\-fsck(1) +Check Git LFS files for consistency\. +.TP +git\-lfs\-install(1) +Install Git LFS configuration\. +.TP +git\-lfs\-lock(1) +Set a file as "locked" on the Git LFS server\. +.TP +git\-lfs\-locks(1) +List currently "locked" files from the Git LFS server\. +.TP +git\-lfs\-logs(1) +Show errors from the Git LFS command\. +.TP +git\-lfs\-ls\-files(1) +Show information about Git LFS files in the index and working tree\. +.TP +git\-lfs\-migrate(1) +Migrate history to or from Git LFS +.TP +git\-lfs\-prune(1) +Delete old Git LFS files from local storage +.TP +git\-lfs\-pull(1) +Fetch Git LFS changes from the remote & checkout any required working tree files\. +.TP +git\-lfs\-push(1) +Push queued large files to the Git LFS endpoint\. +.TP +git\-lfs\-status(1) +Show the status of Git LFS files in the working tree\. +.TP +git\-lfs\-track(1) +View or add Git LFS paths to Git attributes\. +.TP +git\-lfs\-uninstall(1) +Uninstall Git LFS by removing hooks and smudge/clean filter configuration\. +.TP +git\-lfs\-unlock(1) +Remove "locked" setting for a file on the Git LFS server\. +.TP +git\-lfs\-untrack(1) +Remove Git LFS paths from Git Attributes\. +.TP +git\-lfs\-update(1) +Update Git hooks for the current Git repository\. +.TP +git\-lfs\-version(1) +Report the version number\. +.SS "Low level plumbing commands" +.TP +git\-lfs\-clean(1) +Git clean filter that converts large files to pointers\. +.TP +git\-lfs\-filter\-process(1) +Git process filter that converts between large files and pointers\. +.TP +git\-lfs\-merge\-driver(1) +Merge text\-based LFS files +.TP +git\-lfs\-pointer(1) +Build and compare pointers\. +.TP +git\-lfs\-post\-checkout(1) +Git post\-checkout hook implementation\. +.TP +git\-lfs\-post\-commit(1) +Git post\-commit hook implementation\. +.TP +git\-lfs\-post\-merge(1) +Git post\-merge hook implementation\. +.TP +git\-lfs\-pre\-push(1) +Git pre\-push hook implementation\. +.TP +git\-lfs\-smudge(1) +Git smudge filter that converts pointer in blobs to the actual content\. +.TP +git\-lfs\-standalone\-file(1) +Git LFS standalone transfer adapter for file URLs (local paths)\. +.SH "EXAMPLES" +To get started with Git LFS, the following commands can be used\. +.IP "1." 4 +Setup Git LFS on your system\. You only have to do this once per repository per machine: +.IP "" 4 +.nf +git lfs install +.fi +.IP "" 0 + +.IP "2." 4 +Choose the type of files you want to track, for examples all \fBISO\fR images, with git\-lfs\-track(1): +.IP "" 4 +.nf +git lfs track "*\.iso" +.fi +.IP "" 0 + +.IP "3." 4 +The above stores this information in gitattributes(5) files, so that file needs to be added to the repository: +.IP "" 4 +.nf +git add \.gitattributes +.fi +.IP "" 0 + +.IP "4." 4 +Commit, push and work with the files normally: +.IP "" 4 +.nf +git add file\.iso +git commit \-m "Add disk image" +git push +.fi +.IP "" 0 + +.IP "" 0 + diff --git a/out/merged/git-lfs-3.2.0/man/man5/git-lfs-config.5 b/out/merged/git-lfs-3.2.0/man/man5/git-lfs-config.5 new file mode 100644 index 0000000000000000000000000000000000000000..308f1622222dd48af76ff8664a0df36993b31b43 --- /dev/null +++ b/out/merged/git-lfs-3.2.0/man/man5/git-lfs-config.5 @@ -0,0 +1,291 @@ +.\" generated with Ronn-NG/v0.9.1 +.\" http://github.com/apjanke/ronn-ng/tree/0.9.1 +.TH "GIT\-LFS\-CONFIG" "5" "May 2022" "" +.SH "NAME" +\fBgit\-lfs\-config\fR \- Configuration options for git\-lfs +.SH "CONFIGURATION FILES" +git\-lfs reads its configuration from any file supported by \fBgit config \-l\fR, including all per\-repository, per\-user, and per\-system Git configuration files\. +.P +Additionally, a small number of settings can be specified in a file called \fB\.lfsconfig\fR at the root of the repository; see the "LFSCONFIG" section for more details\. This configuration file is useful for setting options such as the LFS URL or access type for all users of a repository, especially when these differ from the default\. The \fB\.lfsconfig\fR file uses the same format as \fB\.gitconfig\fR\. +.P +If the \fB\.lfsconfig\fR file is missing, the index is checked for a version of the file, and that is used instead\. If both are missing, \fBHEAD\fR is checked for the file\. If the repository is bare, only \fBHEAD\fR is checked\. This order may change for checkouts in the future to better match Git\'s behavior\. +.P +Settings from Git configuration files override the \fB\.lfsconfig\fR file\. This allows you to override settings like \fBlfs\.url\fR in your local environment without having to modify the \fB\.lfsconfig\fR file\. +.P +Most options regarding git\-lfs are contained in the \fB[lfs]\fR section, meaning they are all named \fBlfs\.foo\fR or similar, although occasionally an lfs option can be scoped inside the configuration for a remote\. +.SH "LIST OF OPTIONS" +.SS "General settings" +.IP "\[ci]" 4 +\fBlfs\.url\fR / \fBremote\.\.lfsurl\fR +.IP +The url used to call the Git LFS remote API\. Default blank (derive from clone URL)\. +.IP "\[ci]" 4 +\fBlfs\.pushurl\fR / \fBremote\.\.lfspushurl\fR +.IP +The url used to call the Git LFS remote API when pushing\. Default blank (derive from either LFS non\-push urls or clone url)\. +.IP "\[ci]" 4 +\fBremote\.lfsdefault\fR +.IP +The remote used to find the Git LFS remote API\. \fBlfs\.url\fR and \fBbranch\.*\.remote\fR for the current branch override this setting\. If this setting is not specified and there is exactly one remote, that remote is picked; otherwise, the default is \fBorigin\fR\. +.IP "\[ci]" 4 +\fBremote\.lfspushdefault\fR +.IP +The remote used to find the Git LFS remote API when pushing\. \fBlfs\.url\fR and \fBbranch\.*\.pushremote\fR for the current branch override this setting\. If this setting is not set, \fBremote\.pushdefault\fR is used, or if that is not set, the order of selection is used as specified in the \fBremote\.lfsdefault\fR above\. +.IP "\[ci]" 4 +\fBlfs\.dialtimeout\fR +.IP +Sets the maximum time, in seconds, that the HTTP client will wait to initiate a connection\. This does not include the time to send a request and wait for a response\. Default: 30 seconds +.IP "\[ci]" 4 +\fBlfs\.tlstimeout\fR +.IP +Sets the maximum time, in seconds, that the HTTP client will wait for a TLS handshake\. Default: 30 seconds\. +.IP "\[ci]" 4 +\fBlfs\.activitytimeout\fR / \fBlfs\.https://\.activitytimeout\fR +.IP +Sets the maximum time, in seconds, that the HTTP client will wait for the next tcp read or write\. If < 1, no activity timeout is used at all\. Default: 30 seconds +.IP "\[ci]" 4 +\fBlfs\.keepalive\fR +.IP +Sets the maximum time, in seconds, for the HTTP client to maintain keepalive connections\. Default: 30 minutes\. +.IP "\[ci]" 4 +\fBlfs\.ssh\.automultiplex\fR +.IP +When using the pure SSH\-based protocol, whether to multiplex requests over a single connection when possible\. This option requires the use of OpenSSH or a compatible SSH client\. Default: true\. +.IP "\[ci]" 4 +\fBlfs\.ssh\.retries\fR +.IP +Specifies the number of times Git LFS will attempt to obtain authorization via SSH before aborting\. Default: 5\. +.IP "\[ci]" 4 +\fBcore\.askpass\fR, GIT_ASKPASS +.IP +Given as a program and its arguments, this is invoked when authentication is needed against the LFS API\. The contents of stdout are interpreted as the password\. +.IP "\[ci]" 4 +\fBlfs\.cachecredentials\fR +.IP +Enables in\-memory SSH and Git Credential caching for a single \'git lfs\' command\. Default: enabled\. +.IP "\[ci]" 4 +\fBlfs\.storage\fR +.IP +Allow override LFS storage directory\. Non\-absolute path is relativized to inside of Git repository directory (usually \fB\.git\fR)\. +.IP +Note: you should not run \fBgit lfs prune\fR if you have different repositories sharing the same storage directory\. +.IP +Default: \fBlfs\fR in Git repository directory (usually \fB\.git/lfs\fR)\. +.IP "\[ci]" 4 +\fBlfs\.largefilewarning\fR +.IP +Warn when a file is 4 GiB or larger\. Such files will be corrupted when using Windows (unless smudging is disabled) with a Git for Windows version less than 2\.34\.0 due to a limitation in Git\. Default: true if the version is less than 2\.34\.0, false otherwise\. +.IP "" 0 +.SS "Upload and download transfer settings" +These settings control how the upload and download of LFS content occurs\. +.IP "\[ci]" 4 +\fBlfs\.concurrenttransfers\fR +.IP +The number of concurrent uploads/downloads\. Default 8\. +.IP "\[ci]" 4 +\fBlfs\.basictransfersonly\fR +.IP +If set to true, only basic HTTP upload/download transfers will be used, ignoring any more advanced transfers that the client/server may support\. This is primarily to work around bugs or incompatibilities\. +.IP +The git\-lfs client supports basic HTTP downloads, resumable HTTP downloads (using \fBRange\fR headers), and resumable uploads via tus\.io protocol\. Custom transfer methods can be added via \fBlfs\.customtransfer\fR (see next section)\. However setting this value to true limits the client to simple HTTP\. +.IP "\[ci]" 4 +\fBlfs\.tustransfers\fR +.IP +If set to true, this enables resumable uploads of LFS objects through the tus\.io API\. Once this feature is finalized, this setting will be removed, and tus\.io uploads will be available for all clients\. +.IP "\[ci]" 4 +\fBlfs\.standalonetransferagent\fR +.IP +Allows the specified custom transfer agent to be used directly for transferring files, without asking the server how the transfers should be made\. The custom transfer agent has to be defined in a \fBlfs\.customtransfer\.\fR settings group\. +.IP "\[ci]" 4 +\fBlfs\.customtransfer\.\.path\fR +.IP +\fBlfs\.customtransfer\.\fR is a settings group which defines a custom transfer hook which allows you to upload/download via an intermediate process, using any mechanism you like (rather than just HTTP)\. \fBpath\fR should point to the process you wish to invoke\. The protocol between the git\-lfs client and the custom transfer process is documented at https://github\.com/git\-lfs/git\-lfs/blob/main/docs/custom\-transfers\.md +.IP +\fIname\fR must be a unique identifier that the LFS server understands\. When calling the LFS API the client will include a list of supported transfer types\. If the server also supports this named transfer type, it will select it and actions returned from the API will be in relation to that transfer type (may not be traditional URLs for example)\. Only if the server accepts \fIname\fR as a transfer it supports will this custom transfer process be invoked\. +.IP "\[ci]" 4 +\fBlfs\.customtransfer\.\.args\fR +.IP +If the custom transfer process requires any arguments, these can be provided here\. This string will be expanded by the shell\. +.IP "\[ci]" 4 +\fBlfs\.customtransfer\.\.concurrent\fR +.IP +If true (the default), git\-lfs will invoke the custom transfer process multiple times in parallel, according to \fBlfs\.concurrenttransfers\fR, splitting the transfer workload between the processes\. +.IP "\[ci]" 4 +\fBlfs\.customtransfer\.\.direction\fR +.IP +Specifies which direction the custom transfer process supports, either "download", "upload", or "both"\. The default if unspecified is "both"\. +.IP "\[ci]" 4 +\fBlfs\.transfer\.maxretries\fR +.IP +Specifies how many retries LFS will attempt per OID before marking the transfer as failed\. Must be an integer which is at least one\. If the value is not an integer, is less than one, or is not given, a value of eight will be used instead\. +.IP "\[ci]" 4 +\fBlfs\.transfer\.maxretrydelay\fR +.IP +Specifies the maximum time in seconds LFS will wait between each retry attempt\. LFS uses exponential backoff for retries, doubling the time between each retry until reaching this limit\. If a server requests a delay using the \fBRetry\-After\fR header, the header value overrides the exponential delay for that attempt and is not limited by this option\. +.IP +Must be an integer which is not negative\. Use zero to disable delays between retries unless requested by a server\. If the value is not an integer, is negative, or is not given, a value of ten will be used instead\. +.IP "\[ci]" 4 +\fBlfs\.transfer\.maxverifies\fR +.IP +Specifies how many verification requests LFS will attempt per OID before marking the transfer as failed, if the object has a verification action associated with it\. Must be an integer which is at least one\. If the value is not an integer, is less than one, or is not given, a default value of three will be used instead\. +.IP "\[ci]" 4 +\fBlfs\.transfer\.enablehrefrewrite\fR +.IP +If set to true, this enables rewriting href of LFS objects using \fBurl\.*\.insteadof/pushinsteadof\fR config\. \fBpushinsteadof\fR is used only for uploading, and \fBinsteadof\fR is used for downloading and for uploading when \fBpushinsteadof\fR is not set\. +.IP "" 0 +.SS "Push settings" +.IP "\[ci]" 4 +\fBlfs\.allowincompletepush\fR +.IP +When pushing, allow objects to be missing from the local cache without halting a Git push\. Default: false\. +.IP "" 0 +.SS "Fetch settings" +.IP "\[ci]" 4 +\fBlfs\.fetchinclude\fR +.IP +When fetching, only download objects which match any entry on this comma\-separated list of paths/filenames\. Wildcard matching is as per gitignore(5)\. See git\-lfs\-fetch(1) for examples\. +.IP "\[ci]" 4 +\fBlfs\.fetchexclude\fR +.IP +When fetching, do not download objects which match any item on this comma\-separated list of paths/filenames\. Wildcard matching is as per gitignore(5)\. See git\-lfs\-fetch(1) for examples\. +.IP "\[ci]" 4 +\fBlfs\.fetchrecentrefsdays\fR +.IP +If non\-zero, fetches refs which have commits within N days of the current date\. Only local refs are included unless lfs\.fetchrecentremoterefs is true\. Also used as a basis for pruning old files\. The default is 7 days\. +.IP "\[ci]" 4 +\fBlfs\.fetchrecentremoterefs\fR +.IP +If true, fetches remote refs (for the remote you\'re fetching) as well as local refs in the recent window\. This is useful to fetch objects for remote branches you might want to check out later\. The default is true; if you set this to false, fetching for those branches will only occur when you either check them out (losing the advantage of fetch \-\-recent), or create a tracking local branch separately then fetch again\. +.IP "\[ci]" 4 +\fBlfs\.fetchrecentcommitsdays\fR +.IP +In addition to fetching at refs, also fetches previous changes made within N days of the latest commit on the ref\. This is useful if you\'re often reviewing recent changes\. Also used as a basis for pruning old files\. The default is 0 (no previous changes)\. +.IP "\[ci]" 4 +\fBlfs\.fetchrecentalways\fR +.IP +Always operate as if \-\-recent was included in a \fBgit lfs fetch\fR call\. Default false\. +.IP "" 0 +.SS "Prune settings" +.IP "\[ci]" 4 +\fBlfs\.pruneoffsetdays\fR +.IP +The number of days added to the \fBlfs\.fetchrecent*\fR settings to determine what can be pruned\. Default is 3 days, i\.e\. that anything fetched at the very oldest edge of the \'recent window\' is eligible for pruning 3 days later\. +.IP "\[ci]" 4 +\fBlfs\.pruneremotetocheck\fR +.IP +Set the remote that LFS files must have been pushed to in order for them to be considered eligible for local pruning\. Also the remote which is called if \-\-verify\-remote is enabled\. +.IP "\[ci]" 4 +\fBlfs\.pruneverifyremotealways\fR +.IP +Always run \fBgit lfs prune\fR as if \fB\-\-verify\-remote\fR was provided\. +.IP "" 0 +.SS "Extensions" +.IP "\[ci]" 4 +\fBlfs\.extension\.\.\fR +.IP +Git LFS extensions enable the manipulation of files streams during smudge and clean\. \fBname\fR groups the settings for a single extension, and the settings are: * \fBclean\fR The command which runs when files are added to the index * \fBsmudge\fR The command which runs when files are written to the working copy * \fBpriority\fR The order of this extension compared to others +.IP "" 0 +.SS "Other settings" +.IP "\[ci]" 4 +\fBlfs\.\.access\fR +.IP +Note: this setting is normally set by LFS itself on receiving a 401 response (authentication required), you don\'t normally need to set it manually\. +.IP +If set to "basic" then credentials will be requested before making batch requests to this url, otherwise a public request will initially be attempted\. +.IP "\[ci]" 4 +\fBlfs\.\.locksverify\fR +.IP +Determines whether locks are checked before Git pushes\. This prevents you from pushing changes to files that other users have locked\. The Git LFS pre\-push hook varies its behavior based on the value of this config key\. +.IP "\[ci]" 4 +\fBnull\fR \- In the absence of a value, Git LFS will attempt the call, and warn if it returns an error\. If the response is valid, Git LFS will set the value to \fBtrue\fR, and will halt the push if the user attempts to update a file locked by another user\. If the server returns a \fB501 Not Implemented\fR response, Git LFS will set the value to \fBfalse\.\fR +.IP "\[ci]" 4 +\fBtrue\fR \- Git LFS will attempt to verify locks, halting the Git push if there are any server issues, or if the user attempts to update a file locked by another user\. +.IP "\[ci]" 4 +\fBfalse\fR \- Git LFS will completely skip the lock check in the pre\-push hook\. You should set this if you\'re not using File Locking, or your Git server verifies locked files on pushes automatically\. +.IP "" 0 +.IP +Supports URL config lookup as described in: https://git\-scm\.com/docs/git\-config#git\-config\-httplturlgt\. To set this value per\-host: \fBgit config \-\-global lfs\.https://github\.com/\.locksverify [true|false]\fR\. +.IP "\[ci]" 4 +\fBlfs\.\.contenttype\fR +.IP +Determines whether Git LFS should attempt to detect an appropriate HTTP \fBContent\-Type\fR header when uploading using the \'basic\' upload adapter\. If set to false, the default header of \fBContent\-Type: application/octet\-stream\fR is chosen instead\. Default: \'true\'\. +.IP "\[ci]" 4 +\fBlfs\.skipdownloaderrors\fR +.IP +Causes Git LFS not to abort the smudge filter when a download error is encountered, which allows actions such as checkout to work when you are unable to download the LFS content\. LFS files which could not download will contain pointer content instead\. +.IP +Note that this will result in git commands which call the smudge filter to report success even in cases when LFS downloads fail, which may affect scripts\. +.IP +You can also set the environment variable GIT_LFS_SKIP_DOWNLOAD_ERRORS=1 to get the same effect\. +.IP "\[ci]" 4 +\fBGIT_LFS_PROGRESS\fR +.IP +This environment variable causes Git LFS to emit progress updates to an absolute file\-path on disk when cleaning, smudging, or fetching\. +.IP +Progress is reported periodically in the form of a new line being appended to the end of the file\. Each new line will take the following format: +.IP +\fB / / \fR +.IP +Each field is described below: * \fBdirection\fR: The direction of transfer, either "checkout", "download", or "upload"\. * \fBcurrent\fR The index of the currently transferring file\. * \fBtotal files\fR The estimated count of all files to be transferred\. * \fBdownloaded\fR The number of bytes already downloaded\. * \fBtotal\fR The entire size of the file, in bytes\. * \fBname\fR The name of the file\. +.IP "\[ci]" 4 +\fBGIT_LFS_FORCE_PROGRESS\fR \fBlfs\.forceprogress\fR +.IP +Controls whether Git LFS will suppress progress status when the standard output stream is not attached to a terminal\. The default is \fBfalse\fR which makes Git LFS detect whether stdout is a terminal and suppress progress when it\'s not; you can disable this behaviour and force progress status even when standard output stream is not a terminal by setting either variable to 1, \'yes\' or \'true\'\. +.IP "\[ci]" 4 +\fBGIT_LFS_SKIP_SMUDGE\fR +.IP +Sets whether or not Git LFS will skip attempting to convert pointers of files tracked into their corresponding objects when checked out into a working copy\. If \'true\', \'1\', \'on\', or similar, Git LFS will skip the smudge process in both \fBgit lfs smudge\fR and \fBgit lfs filter\-process\fR\. If unset, or set to \'false\', \'0\', \'off\', or similar, Git LFS will smudge files as normal\. +.IP "\[ci]" 4 +\fBGIT_LFS_SKIP_PUSH\fR +.IP +Sets whether or not Git LFS will attempt to upload new Git LFS object in a pre\-push hook\. If \'true\', \'1\', \'on\', or similar, Git LFS will skip the pre\-push hook, so no new Git LFS objects will be uploaded\. If unset, or set to \'false\', \'0\', \'off\', or similar, Git LFS will proceed as normal\. +.IP "\[ci]" 4 +\fBGIT_LFS_SET_LOCKABLE_READONLY\fR \fBlfs\.setlockablereadonly\fR +.IP +These settings, the first an environment variable and the second a gitconfig setting, control whether files marked as \'lockable\' in \fBgit lfs track\fR are made read\-only in the working copy when not locked by the current user\. The default is \fBtrue\fR; you can disable this behaviour and have all files writeable by setting either variable to 0, \'no\' or \'false\'\. +.IP "\[ci]" 4 +\fBlfs\.lockignoredfiles\fR +.IP +This setting controls whether Git LFS will set ignored files that match the lockable pattern read only as well as tracked files\. The default is \fBfalse\fR; you can enable this behavior by setting the variable to 1, \'yes\', or \'true\'\. +.IP "\[ci]" 4 +\fBlfs\.defaulttokenttl\fR +.IP +This setting sets a default token TTL when git\-lfs\-authenticate does not include the TTL in the JSON response but still enforces it\. +.IP +Note that this is only necessary for larger repositories hosted on LFS servers that don\'t include the TTL\. +.IP "" 0 +.SH "LFSCONFIG" +The \.lfsconfig file in a repository is read and interpreted in the same format as the file stored in \.git/config\. It allows a subset of keys to be used, including and limited to: +.IP "\[ci]" 4 +lfs\.allowincompletepush +.IP "\[ci]" 4 +lfs\.fetchexclude +.IP "\[ci]" 4 +lfs\.fetchinclude +.IP "\[ci]" 4 +lfs\.gitprotocol +.IP "\[ci]" 4 +lfs\.locksverify +.IP "\[ci]" 4 +lfs\.pushurl +.IP "\[ci]" 4 +lfs\.skipdownloaderrors +.IP "\[ci]" 4 +lfs\.url +.IP "\[ci]" 4 +lfs\.{*}\.access +.IP "\[ci]" 4 +remote\.{name}\.lfsurl +.IP "" 0 +.P +The set of keys allowed in this file is restricted for security reasons\. +.SH "EXAMPLES" +.TP +Configure a custom LFS endpoint for your repository: + +.P +\fBgit config \-f \.lfsconfig lfs\.url https://lfs\.example\.com/foo/bar/info/lfs\fR +.SH "SEE ALSO" +git\-config(1), git\-lfs\-install(1), gitattributes(5), gitignore(5)\. +.P +Part of the git\-lfs(1) suite\. diff --git a/out/merged/git-lfs-linux-amd64-v3.2.0.tar.gz b/out/merged/git-lfs-linux-amd64-v3.2.0.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..7beccddf270c0ce5f1d14badf30521455823a423 --- /dev/null +++ b/out/merged/git-lfs-linux-amd64-v3.2.0.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6730b8036d9d99f872752489a331995930fec17b61c87c7af1945c65a482a50 +size 4449523 diff --git a/out/merged/git-lfs-linux-amd64-v3.2.0.tar.gz.1 b/out/merged/git-lfs-linux-amd64-v3.2.0.tar.gz.1 new file mode 100644 index 0000000000000000000000000000000000000000..7beccddf270c0ce5f1d14badf30521455823a423 --- /dev/null +++ b/out/merged/git-lfs-linux-amd64-v3.2.0.tar.gz.1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6730b8036d9d99f872752489a331995930fec17b61c87c7af1945c65a482a50 +size 4449523 diff --git a/out/merged/pytorch_model-00001-of-00003.bin b/out/merged/pytorch_model-00001-of-00003.bin new file mode 100644 index 0000000000000000000000000000000000000000..c14e512e4dab024dd0d5774d35ee6dc9ca9c803b --- /dev/null +++ b/out/merged/pytorch_model-00001-of-00003.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:245d2f6a87f1972ac37185d7dcc42f39bf4b629d6399b80e7c9383fe267a5a22 +size 4943185632 diff --git a/out/merged/pytorch_model-00002-of-00003.bin b/out/merged/pytorch_model-00002-of-00003.bin new file mode 100644 index 0000000000000000000000000000000000000000..9300972c4f67726d274ccfaf67eca9d414238b3b --- /dev/null +++ b/out/merged/pytorch_model-00002-of-00003.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cad9c3fe2ff13540f9b943c6f076bf9f96af5feeca95c6626891f5db362c559 +size 4999844744 diff --git a/out/merged/pytorch_model-00003-of-00003.bin b/out/merged/pytorch_model-00003-of-00003.bin new file mode 100644 index 0000000000000000000000000000000000000000..3af6e82e9902143f48ae376da7088ccf5456fd03 --- /dev/null +++ b/out/merged/pytorch_model-00003-of-00003.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a044d16be1c41bbac7b0a6a7d6b259c45e81ee2d678bc517defc1cc6715c233 +size 4540537414 diff --git a/out/merged/pytorch_model.bin.index.json b/out/merged/pytorch_model.bin.index.json new file mode 100644 index 0000000000000000000000000000000000000000..76fccc201c63903bcc555d59944b099e5cc7d336 --- /dev/null +++ b/out/merged/pytorch_model.bin.index.json @@ -0,0 +1,298 @@ +{ + "metadata": { + "total_size": 14483464192 + }, + "weight_map": { + "lm_head.weight": "pytorch_model-00003-of-00003.bin", + "model.embed_tokens.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.0.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.0.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.1.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.1.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.1.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.1.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.1.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.1.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.10.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.10.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.10.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.10.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.10.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.10.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.10.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.10.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.10.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.11.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.11.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.11.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.11.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.11.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.11.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.11.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.11.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.11.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.12.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.12.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.12.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.12.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.12.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.12.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.12.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.12.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.12.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.13.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.13.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.13.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.13.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.13.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.13.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.13.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.13.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.13.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.14.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.14.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.14.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.14.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.14.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.14.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.14.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.14.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.14.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.15.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.15.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.15.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.15.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.15.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.15.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.15.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.15.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.15.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.16.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.16.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.16.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.16.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.16.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.16.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.16.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.16.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.16.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.17.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.17.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.17.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.17.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.17.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.17.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.17.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.17.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.17.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.18.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.18.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.18.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.18.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.18.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.18.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.18.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.18.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.18.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.19.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.19.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.19.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.19.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.19.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.19.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.19.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.19.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.19.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.2.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.2.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.2.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.2.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.2.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.2.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.2.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.2.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.2.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.20.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.20.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.20.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.20.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.20.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.20.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.20.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.20.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.20.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.21.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.21.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.21.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.21.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.21.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.21.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.22.input_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.22.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.22.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.22.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.22.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.23.input_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.23.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.23.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.23.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.23.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.24.input_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.24.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.24.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.24.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.24.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.24.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.24.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.24.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.24.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.25.input_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.25.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.25.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.25.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.25.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.25.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.25.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.25.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.25.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.26.input_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.26.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.26.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.26.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.26.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.26.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.26.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.26.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.26.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.27.input_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.27.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.27.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.27.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.27.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.27.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.27.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.27.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.27.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.28.input_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.28.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.28.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.28.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.28.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.28.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.28.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.28.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.28.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.29.input_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.29.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.29.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.29.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.29.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.29.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.29.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.29.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.29.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.3.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.3.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.3.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.3.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.3.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.3.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.3.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.3.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.3.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.30.input_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.30.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.30.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.30.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.30.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.30.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.30.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.30.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.30.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.31.input_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.31.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.31.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.31.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.31.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.31.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.31.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.31.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.31.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.4.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.4.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.4.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.4.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.4.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.4.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.4.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.4.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.4.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.5.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.5.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.5.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.5.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.5.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.5.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.5.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.5.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.5.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.6.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.6.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.6.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.6.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.6.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.6.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.6.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.6.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.6.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.7.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.7.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.7.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.7.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.7.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.7.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.7.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.7.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.7.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.8.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.8.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.8.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.8.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.8.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.8.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.8.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.8.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.8.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.9.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.9.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.9.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.9.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.9.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.9.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.norm.weight": "pytorch_model-00003-of-00003.bin" + } +} diff --git a/out/merged/special_tokens_map.json b/out/merged/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..72ecfeeb7e14d244c936169d2ed139eeae235ef1 --- /dev/null +++ b/out/merged/special_tokens_map.json @@ -0,0 +1,24 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/out/merged/tokenizer.model b/out/merged/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/out/merged/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/out/merged/tokenizer_config.json b/out/merged/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a583d0f8c569fedb4f95590b442b2ac9f5fbd0bf --- /dev/null +++ b/out/merged/tokenizer_config.json @@ -0,0 +1,45 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": true, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "bos_token": "", + "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false, + "use_fast": true +} diff --git a/out/special_tokens_map.json b/out/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..72ecfeeb7e14d244c936169d2ed139eeae235ef1 --- /dev/null +++ b/out/special_tokens_map.json @@ -0,0 +1,24 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/out/tokenizer.model b/out/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/out/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/out/tokenizer_config.json b/out/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a583d0f8c569fedb4f95590b442b2ac9f5fbd0bf --- /dev/null +++ b/out/tokenizer_config.json @@ -0,0 +1,45 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": true, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "bos_token": "", + "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false, + "use_fast": true +} diff --git a/out/training_args.bin b/out/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1bf29f9aeb8d54142b45324e1e89c4134fa2826b --- /dev/null +++ b/out/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4783b55cb3ce48e21fc6968b7681826272dbf548a28fbbd59dcf516adf35655f +size 5688 diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000000000000000000000000000000000000..4b5df167b61d26ce682651410411614ca84a7d92 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,4 @@ +pre-commit +black +mypy +types-requests diff --git a/requirements-tests.txt b/requirements-tests.txt new file mode 100644 index 0000000000000000000000000000000000000000..e079f8a6038dd2dc8512967540f96ee0de172067 --- /dev/null +++ b/requirements-tests.txt @@ -0,0 +1 @@ +pytest diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..75ce7a0d8a107bdfb5c97df5f41f52860227b8af --- /dev/null +++ b/requirements.txt @@ -0,0 +1,42 @@ +--extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/ +packaging==23.2 +peft==0.9.0 +transformers @ git+https://github.com/huggingface/transformers.git@73a73b415e36f41481369f6129cb4b62bb127a78 +tokenizers==0.15.0 +bitsandbytes==0.43.0 +accelerate==0.28.0 +deepspeed==0.13.1 +pydantic==2.6.3 +addict +fire +PyYAML>=6.0 +requests +datasets>=2.15.0 +flash-attn==2.5.5 +sentencepiece +wandb +einops +xformers==0.0.22 +optimum==1.16.2 +hf_transfer +colorama +numba +numpy>=1.24.4 +# qlora things +evaluate==0.4.1 +scipy +scikit-learn==1.2.2 +pynvml +art +fschat==0.2.36 +gradio==3.50.2 +tensorboard + +mamba-ssm==1.1.1 + +# remote filesystems +s3fs +gcsfs +# adlfs + +trl @ git+https://github.com/huggingface/trl.git@304e208f778a5442c30cdda500348226cdc97d90 diff --git a/scripts/cloud-entrypoint.sh b/scripts/cloud-entrypoint.sh new file mode 100644 index 0000000000000000000000000000000000000000..c7b9ca3e0f12bc65382edfcf21a3305e748e83d2 --- /dev/null +++ b/scripts/cloud-entrypoint.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +# Export specific ENV variables to /etc/rp_environment +echo "Exporting environment variables..." +printenv | grep -E '^RUNPOD_|^PATH=|^_=' | sed 's/^\(.*\)=\(.*\)$/export \1="\2"/' >> /etc/rp_environment +echo 'source /etc/rp_environment' >> ~/.bashrc + +if [[ $PUBLIC_KEY ]]; then + # runpod + mkdir -p ~/.ssh + chmod 700 ~/.ssh + echo $PUBLIC_KEY >> ~/.ssh/authorized_keys + chmod 700 -R ~/.ssh + # Start the SSH service in the background + service ssh start +elif [ -n "$SSH_KEY" ]; then + # latitude.sh + mkdir -p ~/.ssh + chmod 700 ~/.ssh + echo $SSH_KEY >> ~/.ssh/authorized_keys + chmod 700 -R ~/.ssh + # Start the SSH service in the background + service ssh start +else + echo "No PUBLIC_KEY or SSH_KEY environment variable provided, not starting openSSH daemon" +fi + +# Check if JUPYTER_PASSWORD is set and not empty +if [ -n "$JUPYTER_PASSWORD" ]; then + # Set JUPYTER_TOKEN to the value of JUPYTER_PASSWORD + export JUPYTER_TOKEN="$JUPYTER_PASSWORD" +fi + +if [ "$JUPYTER_DISABLE" != "1" ]; then + # Run Jupyter Lab in the background + jupyter lab --port=8888 --ip=* --allow-root --ServerApp.allow_origin=* --ServerApp.preferred_dir=/workspace & +fi + +# Execute the passed arguments (CMD) +exec "$@" diff --git a/scripts/finetune.py b/scripts/finetune.py new file mode 100644 index 0000000000000000000000000000000000000000..d5bbcaf8f019b3b86733a8d70cdd6cff24b5d3a7 --- /dev/null +++ b/scripts/finetune.py @@ -0,0 +1,52 @@ +"""Prepare and train a model on a dataset. Can also infer from a model or merge lora""" +import logging +from pathlib import Path + +import fire +import transformers + +from axolotl.cli import ( + check_accelerate_default_config, + check_user_token, + do_inference, + do_merge_lora, + load_cfg, + load_datasets, + print_axolotl_text_art, +) +from axolotl.cli.shard import shard +from axolotl.common.cli import TrainerCliArgs +from axolotl.train import train + +LOG = logging.getLogger("axolotl.scripts.finetune") + + +def do_cli(config: Path = Path("examples/"), **kwargs): + print_axolotl_text_art() + LOG.warning( + str( + PendingDeprecationWarning( + "scripts/finetune.py will be replaced with calling axolotl.cli.train" + ) + ) + ) + parsed_cfg = load_cfg(config, **kwargs) + check_accelerate_default_config() + check_user_token() + parser = transformers.HfArgumentParser((TrainerCliArgs)) + parsed_cli_args, _ = parser.parse_args_into_dataclasses( + return_remaining_strings=True + ) + if parsed_cli_args.inference: + do_inference(cfg=parsed_cfg, cli_args=parsed_cli_args) + elif parsed_cli_args.merge_lora: + do_merge_lora(cfg=parsed_cfg, cli_args=parsed_cli_args) + elif parsed_cli_args.shard: + shard(cfg=parsed_cfg, cli_args=parsed_cli_args) + else: + dataset_meta = load_datasets(cfg=parsed_cfg, cli_args=parsed_cli_args) + train(cfg=parsed_cfg, cli_args=parsed_cli_args, dataset_meta=dataset_meta) + + +if __name__ == "__main__": + fire.Fire(do_cli) diff --git a/scripts/motd b/scripts/motd new file mode 100644 index 0000000000000000000000000000000000000000..060a5c5c6c7e6452095cd0dd52c25631bc23d083 --- /dev/null +++ b/scripts/motd @@ -0,0 +1,17 @@ + + dP dP dP + 88 88 88 + .d8888b. dP. .dP .d8888b. 88 .d8888b. d8888P 88 + 88' `88 `8bd8' 88' `88 88 88' `88 88 88 + 88. .88 .d88b. 88. .88 88 88. .88 88 88 + `88888P8 dP' `dP `88888P' dP `88888P' dP dP + +Welcome to the axolotl cloud image! If the you've mounted a disk to /workspace and the axolotl directory ie empty, run the following commands: + +``` +cd /workspace +rm -rf /workspace/axolotl +git clone https://github.com/OpenAccess-AI-Collective/axolotl.git +cd axolotl +pip install --no-deps -e . +``` diff --git a/setup.py b/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..307691bd492180b262afec4b67d212c73aec4596 --- /dev/null +++ b/setup.py @@ -0,0 +1,96 @@ +"""setup.py for axolotl""" + +import platform +import re +from importlib.metadata import PackageNotFoundError, version + +from setuptools import find_packages, setup + + +def parse_requirements(): + _install_requires = [] + _dependency_links = [] + with open("./requirements.txt", encoding="utf-8") as requirements_file: + lines = [r.strip() for r in requirements_file.readlines()] + for line in lines: + is_extras = ( + "flash-attn" in line + or "flash-attention" in line + or "deepspeed" in line + or "mamba-ssm" in line + or "lion-pytorch" in line + ) + if line.startswith("--extra-index-url"): + # Handle custom index URLs + _, url = line.split() + _dependency_links.append(url) + elif not is_extras and line and line[0] != "#": + # Handle standard packages + _install_requires.append(line) + + try: + if "Darwin" in platform.system(): + _install_requires.pop(_install_requires.index("xformers==0.0.22")) + else: + torch_version = version("torch") + _install_requires.append(f"torch=={torch_version}") + + version_match = re.match(r"^(\d+)\.(\d+)(?:\.(\d+))?", torch_version) + if version_match: + major, minor, patch = version_match.groups() + major, minor = int(major), int(minor) + patch = ( + int(patch) if patch is not None else 0 + ) # Default patch to 0 if not present + else: + raise ValueError("Invalid version format") + + if (major, minor) >= (2, 1): + _install_requires.pop(_install_requires.index("xformers==0.0.22")) + _install_requires.append("xformers>=0.0.23") + except PackageNotFoundError: + pass + + return _install_requires, _dependency_links + + +install_requires, dependency_links = parse_requirements() + + +setup( + name="axolotl", + version="0.4.0", + description="LLM Trainer", + long_description="Axolotl is a tool designed to streamline the fine-tuning of various AI models, offering support for multiple configurations and architectures.", + package_dir={"": "src"}, + packages=find_packages(), + install_requires=install_requires, + dependency_links=dependency_links, + extras_require={ + "flash-attn": [ + "flash-attn==2.5.5", + ], + "fused-dense-lib": [ + "fused-dense-lib @ git+https://github.com/Dao-AILab/flash-attention@v2.3.3#subdirectory=csrc/fused_dense_lib", + ], + "deepspeed": [ + "deepspeed==0.13.1", + "deepspeed-kernels", + ], + "mamba-ssm": [ + "mamba-ssm==1.0.1", + ], + "auto-gptq": [ + "auto-gptq==0.5.1", + ], + "mlflow": [ + "mlflow", + ], + "lion-pytorch": [ + "lion-pytorch==0.1.2", + ], + "galore": [ + "galore_torch", + ], + }, +) diff --git a/src/axolotl/__init__.py b/src/axolotl/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/axolotl/cli/__init__.py b/src/axolotl/cli/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..79a9d319381390292e8152a2719a2c2bd3e4df0b --- /dev/null +++ b/src/axolotl/cli/__init__.py @@ -0,0 +1,452 @@ +"""Prepare and train a model on a dataset. Can also infer from a model or merge lora""" + +import importlib +import json +import logging +import math +import os +import random +import sys +import tempfile +from pathlib import Path +from threading import Thread +from typing import Any, Dict, List, Optional, Union +from urllib.parse import urlparse + +import requests +import torch +import yaml + +# add src to the pythonpath so we don't need to pip install this +from accelerate.commands.config import config_args +from art import text2art +from huggingface_hub import HfApi +from huggingface_hub.utils import LocalTokenNotFoundError +from transformers import GenerationConfig, TextIteratorStreamer, TextStreamer +from transformers.utils import is_torch_bf16_gpu_available + +from axolotl.common.cli import TrainerCliArgs, load_model_and_tokenizer +from axolotl.logging_config import configure_logging +from axolotl.train import TrainDatasetMeta +from axolotl.utils.config import ( + normalize_cfg_datasets, + normalize_config, + validate_config, +) +from axolotl.utils.data import load_prepare_dpo_datasets, prepare_dataset +from axolotl.utils.dict import DictDefault +from axolotl.utils.distributed import is_main_process +from axolotl.utils.mlflow_ import setup_mlflow_env_vars +from axolotl.utils.models import load_tokenizer +from axolotl.utils.tokenization import check_dataset_labels +from axolotl.utils.trainer import prepare_optim_env +from axolotl.utils.wandb_ import setup_wandb_env_vars + +project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) +src_dir = os.path.join(project_root, "src") +sys.path.insert(0, src_dir) + +configure_logging() +LOG = logging.getLogger("axolotl.scripts") + +os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" + + +def print_axolotl_text_art(suffix=None): + font = "nancyj" + ascii_text = " axolotl" + if suffix: + ascii_text += f" x {suffix}" + ascii_art = text2art(ascii_text, font=font) + + if is_main_process(): + print(ascii_art) + + +def check_remote_config(config: Union[str, Path]): + # Check if the config is a valid HTTPS URL to a .yml or .yaml file + if not (isinstance(config, str) and config.startswith("https://")): + return config # Return the original value if it's not a valid URL + + filename = os.path.basename(urlparse(config).path) + temp_dir = tempfile.mkdtemp() + + try: + response = requests.get(config, timeout=30) + response.raise_for_status() # Check for HTTP errors + + content = response.content + try: + # Try parsing as JSON first to catch cases where JSON content is mistakenly considered YAML + json.loads(content) + # Log a warning but do not raise an error; JSON is technically valid YAML - this can happen when you forget to point to a raw github link + LOG.warning( + f"Warning: The content of the file at {config} is JSON, which is technically valid YAML but might not be intended." + ) + except json.JSONDecodeError: + # If it's not valid JSON, verify it's valid YAML + try: + yaml.safe_load(content) + except yaml.YAMLError as err: + raise ValueError( + f"Failed to parse the content at {config} as YAML: {err}" + ) from err + + # Write the content to a file if it's valid YAML (or JSON treated as YAML) + output_path = Path(temp_dir) / filename + with open(output_path, "wb") as file: + file.write(content) + LOG.info( + f"Using the following config obtained from {config}:\n\n{content.decode('utf-8')}\n" + ) + return output_path + + except requests.RequestException as err: + # This catches all requests-related exceptions including HTTPError + raise RuntimeError(f"Failed to download {config}: {err}") from err + except Exception as err: + # Catch-all for any other exceptions + raise err + + +def get_multi_line_input() -> Optional[str]: + print("Give me an instruction (Ctrl + D to submit): ") + instruction = "" + for line in sys.stdin: + instruction += line # pylint: disable=consider-using-join + # instruction = pathlib.Path("/proc/self/fd/0").read_text() + return instruction + + +def do_merge_lora( + *, + cfg: DictDefault, + cli_args: TrainerCliArgs, +): + model, tokenizer = load_model_and_tokenizer(cfg=cfg, cli_args=cli_args) + safe_serialization = cfg.save_safetensors is True + + LOG.info("running merge of LoRA with base model") + model = model.merge_and_unload(progressbar=True) + try: + model.to(dtype=cfg.torch_dtype) + except RuntimeError: + pass + model.generation_config.do_sample = True + + if cfg.local_rank == 0: + LOG.info(f"saving merged model to: {str(Path(cfg.output_dir) / 'merged')}") + model.save_pretrained( + str(Path(cfg.output_dir) / "merged"), + safe_serialization=safe_serialization, + progressbar=True, + ) + tokenizer.save_pretrained(str(Path(cfg.output_dir) / "merged")) + + +def do_inference( + *, + cfg: DictDefault, + cli_args: TrainerCliArgs, +): + model, tokenizer = load_model_and_tokenizer(cfg=cfg, cli_args=cli_args) + prompter = cli_args.prompter + default_tokens = {"unk_token": "", "bos_token": "", "eos_token": ""} + + for token, symbol in default_tokens.items(): + # If the token isn't already specified in the config, add it + if not (cfg.special_tokens and token in cfg.special_tokens): + tokenizer.add_special_tokens({token: symbol}) + + prompter_module = None + if prompter: + prompter_module = getattr( + importlib.import_module("axolotl.prompters"), prompter + ) + + model = model.to(cfg.device, dtype=cfg.torch_dtype) + + while True: + print("=" * 80) + # support for multiline inputs + instruction = get_multi_line_input() + if not instruction: + return + if prompter_module: + prompt: str = next( + prompter_module().build_prompt(instruction=instruction.strip("\n")) + ) + else: + prompt = instruction.strip() + batch = tokenizer(prompt, return_tensors="pt", add_special_tokens=True) + + print("=" * 40) + model.eval() + with torch.no_grad(): + generation_config = GenerationConfig( + repetition_penalty=1.1, + max_new_tokens=1024, + temperature=0.9, + top_p=0.95, + top_k=40, + bos_token_id=tokenizer.bos_token_id, + eos_token_id=tokenizer.eos_token_id, + pad_token_id=tokenizer.pad_token_id, + do_sample=True, + use_cache=True, + return_dict_in_generate=True, + output_attentions=False, + output_hidden_states=False, + output_scores=False, + ) + streamer = TextStreamer(tokenizer) + generated = model.generate( + inputs=batch["input_ids"].to(cfg.device), + generation_config=generation_config, + streamer=streamer, + ) + print("=" * 40) + print(tokenizer.decode(generated["sequences"].cpu().tolist()[0])) + + +def do_inference_gradio( + *, + cfg: DictDefault, + cli_args: TrainerCliArgs, +): + import gradio as gr + + model, tokenizer = load_model_and_tokenizer(cfg=cfg, cli_args=cli_args) + prompter = cli_args.prompter + default_tokens = {"unk_token": "", "bos_token": "", "eos_token": ""} + + for token, symbol in default_tokens.items(): + # If the token isn't already specified in the config, add it + if not (cfg.special_tokens and token in cfg.special_tokens): + tokenizer.add_special_tokens({token: symbol}) + + prompter_module = None + if prompter: + prompter_module = getattr( + importlib.import_module("axolotl.prompters"), prompter + ) + + model = model.to(cfg.device, dtype=cfg.torch_dtype) + + def generate(instruction): + if not instruction: + return + if prompter_module: + # pylint: disable=stop-iteration-return + prompt: str = next( + prompter_module().build_prompt(instruction=instruction.strip("\n")) + ) + else: + prompt = instruction.strip() + batch = tokenizer(prompt, return_tensors="pt", add_special_tokens=True) + + model.eval() + with torch.no_grad(): + generation_config = GenerationConfig( + repetition_penalty=1.1, + max_new_tokens=1024, + temperature=0.9, + top_p=0.95, + top_k=40, + bos_token_id=tokenizer.bos_token_id, + eos_token_id=tokenizer.eos_token_id, + pad_token_id=tokenizer.pad_token_id, + do_sample=True, + use_cache=True, + return_dict_in_generate=True, + output_attentions=False, + output_hidden_states=False, + output_scores=False, + ) + streamer = TextIteratorStreamer(tokenizer) + generation_kwargs = { + "inputs": batch["input_ids"].to(cfg.device), + "generation_config": generation_config, + "streamer": streamer, + } + + thread = Thread(target=model.generate, kwargs=generation_kwargs) + thread.start() + + all_text = "" + + for new_text in streamer: + all_text += new_text + yield all_text + + demo = gr.Interface( + fn=generate, + inputs="textbox", + outputs="text", + title=cfg.get("gradio_title", "Axolotl Gradio Interface"), + ) + demo.queue().launch(show_api=False, share=True) + + +def choose_config(path: Path): + yaml_files = list(path.glob("*.yml")) + + if not yaml_files: + raise ValueError( + "No YAML config files found in the specified directory. Are you using a .yml extension?" + ) + + if len(yaml_files) == 1: + print(f"Using default YAML file '{yaml_files[0]}'") + return yaml_files[0] + + print("Choose a YAML file:") + for idx, file in enumerate(yaml_files): + print(f"{idx + 1}. {file}") + + chosen_file = None + while chosen_file is None: + try: + choice = int(input("Enter the number of your choice: ")) + if 1 <= choice <= len(yaml_files): + chosen_file = yaml_files[choice - 1] + else: + print("Invalid choice. Please choose a number from the list.") + except ValueError: + print("Invalid input. Please enter a number.") + + return chosen_file + + +def check_not_in(list1: List[str], list2: Union[Dict[str, Any], List[str]]) -> bool: + return not any(el in list2 for el in list1) + + +def load_cfg(config: Union[str, Path] = Path("examples/"), **kwargs): + config = check_remote_config(config) + if Path(config).is_dir(): + config = choose_config(Path(config)) + + # load the config from the yaml file + with open(config, encoding="utf-8") as file: + cfg: DictDefault = DictDefault(yaml.safe_load(file)) + # if there are any options passed in the cli, if it is something that seems valid from the yaml, + # then overwrite the value + cfg_keys = cfg.keys() + for k, _ in kwargs.items(): + # if not strict, allow writing to cfg even if it's not in the yml already + if k in cfg_keys or not cfg.strict: + # handle booleans + if isinstance(cfg[k], bool): + cfg[k] = bool(kwargs[k]) + else: + cfg[k] = kwargs[k] + + cfg.axolotl_config_path = config + + try: + device_props = torch.cuda.get_device_properties("cuda") + gpu_version = "sm_" + str(device_props.major) + str(device_props.minor) + except: # pylint: disable=bare-except # noqa: E722 + gpu_version = None + + cfg = validate_config( + cfg, + capabilities={ + "bf16": is_torch_bf16_gpu_available(), + "n_gpu": os.environ.get("WORLD_SIZE", 1), + "compute_capability": gpu_version, + }, + ) + + prepare_optim_env(cfg) + + normalize_config(cfg) + + normalize_cfg_datasets(cfg) + + setup_wandb_env_vars(cfg) + + setup_mlflow_env_vars(cfg) + + return cfg + + +def load_datasets( + *, + cfg: DictDefault, + cli_args: TrainerCliArgs, +) -> TrainDatasetMeta: + tokenizer = load_tokenizer(cfg) + + train_dataset, eval_dataset, total_num_steps, prompters = prepare_dataset( + cfg, tokenizer + ) + + if cli_args.debug or cfg.debug: + LOG.info("check_dataset_labels...") + check_dataset_labels( + train_dataset.select( + [ + random.randrange(0, len(train_dataset) - 1) # nosec + for _ in range(cli_args.debug_num_examples) + ] + ), + tokenizer, + num_examples=cli_args.debug_num_examples, + text_only=cli_args.debug_text_only, + ) + + LOG.info("printing prompters...") + for prompter in prompters: + LOG.info(prompter) + + return TrainDatasetMeta( + train_dataset=train_dataset, + eval_dataset=eval_dataset, + total_num_steps=total_num_steps, + ) + + +def load_rl_datasets( + *, + cfg: DictDefault, + cli_args: TrainerCliArgs, # pylint: disable=unused-argument +) -> TrainDatasetMeta: + train_dataset, eval_dataset = load_prepare_dpo_datasets(cfg) + total_num_steps = int( + math.ceil(len(train_dataset) * cfg.num_epochs / cfg.batch_size) + ) + + return TrainDatasetMeta( + train_dataset=train_dataset, + eval_dataset=eval_dataset, + total_num_steps=total_num_steps, + ) + + +def check_accelerate_default_config(): + if Path(config_args.default_yaml_config_file).exists(): + LOG.warning( + f"accelerate config file found at {config_args.default_yaml_config_file}. This can lead to unexpected errors" + ) + + +def check_user_token(): + # Skip check if HF_HUB_OFFLINE is set to True + if os.getenv("HF_HUB_OFFLINE") == "1": + LOG.info( + "Skipping HuggingFace token verification because HF_HUB_OFFLINE is set to True. Only local files will be used." + ) + return True + + # Verify if token is valid + api = HfApi() + try: + user_info = api.whoami() + return bool(user_info) + except LocalTokenNotFoundError: + LOG.warning( + "Error verifying HuggingFace token. Remember to log in using `huggingface-cli login` and get your access token from https://huggingface.co/settings/tokens if you want to use gated models or datasets." + ) + return False diff --git a/src/axolotl/cli/inference.py b/src/axolotl/cli/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..86ad8409ff386ef857214207f460c22843af6be3 --- /dev/null +++ b/src/axolotl/cli/inference.py @@ -0,0 +1,36 @@ +""" +CLI to run inference on a trained model +""" +from pathlib import Path + +import fire +import transformers + +from axolotl.cli import ( + do_inference, + do_inference_gradio, + load_cfg, + print_axolotl_text_art, +) +from axolotl.common.cli import TrainerCliArgs + + +def do_cli(config: Path = Path("examples/"), gradio=False, **kwargs): + # pylint: disable=duplicate-code + print_axolotl_text_art() + parsed_cfg = load_cfg(config, **kwargs) + parsed_cfg.sample_packing = False + parser = transformers.HfArgumentParser((TrainerCliArgs)) + parsed_cli_args, _ = parser.parse_args_into_dataclasses( + return_remaining_strings=True + ) + parsed_cli_args.inference = True + + if gradio: + do_inference_gradio(cfg=parsed_cfg, cli_args=parsed_cli_args) + else: + do_inference(cfg=parsed_cfg, cli_args=parsed_cli_args) + + +if __name__ == "__main__": + fire.Fire(do_cli) diff --git a/src/axolotl/cli/merge_lora.py b/src/axolotl/cli/merge_lora.py new file mode 100644 index 0000000000000000000000000000000000000000..400201fc97eeed5f4b96f2371b2a735c6c9d5129 --- /dev/null +++ b/src/axolotl/cli/merge_lora.py @@ -0,0 +1,46 @@ +""" +CLI to run merge a trained LoRA into a base model +""" +from pathlib import Path + +import fire +import transformers + +from axolotl.cli import do_merge_lora, load_cfg, print_axolotl_text_art +from axolotl.common.cli import TrainerCliArgs + + +def do_cli(config: Path = Path("examples/"), **kwargs): + # pylint: disable=duplicate-code + print_axolotl_text_art() + parser = transformers.HfArgumentParser((TrainerCliArgs)) + parsed_cli_args, _ = parser.parse_args_into_dataclasses( + return_remaining_strings=True + ) + parsed_cli_args.merge_lora = True + + parsed_cfg = load_cfg( + config, + merge_lora=True, + load_in_8bit=False, + load_in_4bit=False, + flash_attention=False, + **kwargs, + ) + + if not parsed_cfg.lora_model_dir and parsed_cfg.output_dir: + parsed_cfg.lora_model_dir = parsed_cfg.output_dir + if not Path(parsed_cfg.lora_model_dir).exists(): + raise ValueError( + f"Target directory for merge: `{parsed_cfg.lora_model_dir}` does not exist." + ) + + parsed_cfg.load_in_4bit = False + parsed_cfg.load_in_8bit = False + parsed_cfg.flash_attention = False + + do_merge_lora(cfg=parsed_cfg, cli_args=parsed_cli_args) + + +if __name__ == "__main__": + fire.Fire(do_cli) diff --git a/src/axolotl/cli/preprocess.py b/src/axolotl/cli/preprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..a1a01d59de8b6530d899282a81059e5bb6fb2367 --- /dev/null +++ b/src/axolotl/cli/preprocess.py @@ -0,0 +1,70 @@ +""" +CLI to run training on a model +""" +import logging +from pathlib import Path +from typing import Union + +import fire +import transformers +from colorama import Fore + +from axolotl.cli import ( + check_accelerate_default_config, + check_user_token, + load_cfg, + load_datasets, + load_rl_datasets, + print_axolotl_text_art, +) +from axolotl.common.cli import PreprocessCliArgs +from axolotl.common.const import DEFAULT_DATASET_PREPARED_PATH +from axolotl.prompt_strategies.sharegpt import register_chatml_template + +LOG = logging.getLogger("axolotl.cli.preprocess") + + +def do_cli(config: Union[Path, str] = Path("examples/"), **kwargs): + # pylint: disable=duplicate-code + print_axolotl_text_art() + parsed_cfg = load_cfg(config, **kwargs) + parsed_cfg.is_preprocess = True + check_accelerate_default_config() + check_user_token() + parser = transformers.HfArgumentParser((PreprocessCliArgs)) + parsed_cli_args, _ = parser.parse_args_into_dataclasses( + return_remaining_strings=True + ) + + if parsed_cfg.chat_template == "chatml" and parsed_cfg.default_system_message: + LOG.info( + f"ChatML set. Adding default system message: {parsed_cfg.default_system_message}" + ) + register_chatml_template(parsed_cfg.default_system_message) + else: + register_chatml_template() + + if not parsed_cfg.dataset_prepared_path: + msg = ( + Fore.RED + + "preprocess CLI called without dataset_prepared_path set, " + + f"using default path: {DEFAULT_DATASET_PREPARED_PATH}" + + Fore.RESET + ) + LOG.warning(msg) + parsed_cfg.dataset_prepared_path = DEFAULT_DATASET_PREPARED_PATH + + if parsed_cfg.rl and parsed_cfg.rl != "orpo": + load_rl_datasets(cfg=parsed_cfg, cli_args=parsed_cli_args) + else: + load_datasets(cfg=parsed_cfg, cli_args=parsed_cli_args) + + LOG.info( + Fore.GREEN + + f"Success! Preprocessed data path: `dataset_prepared_path: {parsed_cfg.dataset_prepared_path}`" + + Fore.RESET + ) + + +if __name__ == "__main__": + fire.Fire(do_cli) diff --git a/src/axolotl/cli/shard.py b/src/axolotl/cli/shard.py new file mode 100644 index 0000000000000000000000000000000000000000..48f22790ac50d99fd0630b9c78d832430b198985 --- /dev/null +++ b/src/axolotl/cli/shard.py @@ -0,0 +1,43 @@ +""" +CLI to shard a trained model into 10GiB chunks +""" +import logging +from pathlib import Path +from typing import Union + +import fire +import transformers + +from axolotl.cli import load_cfg, print_axolotl_text_art +from axolotl.common.cli import TrainerCliArgs, load_model_and_tokenizer +from axolotl.utils.dict import DictDefault + +LOG = logging.getLogger("axolotl.scripts") + + +def shard( + *, + cfg: DictDefault, + cli_args: TrainerCliArgs, +): + model, _ = load_model_and_tokenizer(cfg=cfg, cli_args=cli_args) + safe_serialization = cfg.save_safetensors is True + LOG.debug("Re-saving model w/ sharding") + model.save_pretrained(cfg.output_dir, safe_serialization=safe_serialization) + + +def do_cli(config: Union[Path, str] = Path("examples/"), **kwargs): + # pylint: disable=duplicate-code + print_axolotl_text_art() + parsed_cfg = load_cfg(config, **kwargs) + parser = transformers.HfArgumentParser((TrainerCliArgs)) + parsed_cli_args, _ = parser.parse_args_into_dataclasses( + return_remaining_strings=True + ) + parsed_cli_args.shard = True + + shard(cfg=parsed_cfg, cli_args=parsed_cli_args) + + +if __name__ == "__main__": + fire.Fire(do_cli) diff --git a/src/axolotl/cli/train.py b/src/axolotl/cli/train.py new file mode 100644 index 0000000000000000000000000000000000000000..7e004567a6872758ef1fe56ad2e7dcba7937618b --- /dev/null +++ b/src/axolotl/cli/train.py @@ -0,0 +1,59 @@ +""" +CLI to run training on a model +""" +import logging +from pathlib import Path +from typing import Tuple, Union + +import fire +from transformers.hf_argparser import HfArgumentParser +from transformers.modeling_utils import PreTrainedModel +from transformers.tokenization_utils import PreTrainedTokenizer + +from axolotl.cli import ( + check_accelerate_default_config, + check_user_token, + load_cfg, + load_datasets, + load_rl_datasets, + print_axolotl_text_art, +) +from axolotl.common.cli import TrainerCliArgs +from axolotl.prompt_strategies.sharegpt import register_chatml_template +from axolotl.train import train + +LOG = logging.getLogger("axolotl.cli.train") + + +def do_cli(config: Union[Path, str] = Path("examples/"), **kwargs): + # pylint: disable=duplicate-code + parsed_cfg = load_cfg(config, **kwargs) + parser = HfArgumentParser((TrainerCliArgs)) + parsed_cli_args, _ = parser.parse_args_into_dataclasses( + return_remaining_strings=True + ) + return do_train(parsed_cfg, parsed_cli_args) + + +def do_train(cfg, cli_args) -> Tuple[PreTrainedModel, PreTrainedTokenizer]: + print_axolotl_text_art() + check_accelerate_default_config() + check_user_token() + if cfg.chat_template == "chatml" and cfg.default_system_message: + LOG.info( + f"ChatML set. Adding default system message: {cfg.default_system_message}" + ) + register_chatml_template(cfg.default_system_message) + else: + register_chatml_template() + + if cfg.rl and cfg.rl != "orpo": + dataset_meta = load_rl_datasets(cfg=cfg, cli_args=cli_args) + else: + dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) + + return train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + + +if __name__ == "__main__": + fire.Fire(do_cli) diff --git a/src/axolotl/common/__init__.py b/src/axolotl/common/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/axolotl/common/cli.py b/src/axolotl/common/cli.py new file mode 100644 index 0000000000000000000000000000000000000000..636a23ba522f79f739927d0cef91f4843dacd526 --- /dev/null +++ b/src/axolotl/common/cli.py @@ -0,0 +1,55 @@ +""" +shared module for cli specific things +""" + +import logging +from dataclasses import dataclass, field +from typing import Optional + +import axolotl.monkeypatch.data.batch_dataset_fetcher # pylint: disable=unused-import # noqa: F401 +from axolotl.logging_config import configure_logging +from axolotl.utils.dict import DictDefault +from axolotl.utils.models import load_model, load_tokenizer + +configure_logging() +LOG = logging.getLogger("axolotl.common.cli") + + +@dataclass +class TrainerCliArgs: + """ + dataclass representing the various non-training arguments + """ + + debug: bool = field(default=False) + debug_text_only: bool = field(default=False) + debug_num_examples: int = field(default=5) + inference: bool = field(default=False) + merge_lora: bool = field(default=False) + prompter: Optional[str] = field(default=None) + shard: bool = field(default=False) + + +@dataclass +class PreprocessCliArgs: + """ + dataclass representing arguments for preprocessing only + """ + + debug: bool = field(default=False) + debug_text_only: bool = field(default=False) + debug_num_examples: int = field(default=1) + prompter: Optional[str] = field(default=None) + + +def load_model_and_tokenizer( + *, + cfg: DictDefault, + cli_args: TrainerCliArgs, +): + LOG.info(f"loading tokenizer... {cfg.tokenizer_config or cfg.base_model_config}") + tokenizer = load_tokenizer(cfg) + LOG.info("loading model and (optionally) peft_config...") + model, _ = load_model(cfg, tokenizer, inference=cli_args.inference) + + return model, tokenizer diff --git a/src/axolotl/common/const.py b/src/axolotl/common/const.py new file mode 100644 index 0000000000000000000000000000000000000000..fd34ad4694f082fd8a1a0f135fba162e977b868c --- /dev/null +++ b/src/axolotl/common/const.py @@ -0,0 +1,5 @@ +""" +Various shared constants +""" + +DEFAULT_DATASET_PREPARED_PATH = "last_run_prepared" diff --git a/src/axolotl/convert.py b/src/axolotl/convert.py new file mode 100644 index 0000000000000000000000000000000000000000..357e0ec50e1a055f28f1a26e39ca3f2c961a6954 --- /dev/null +++ b/src/axolotl/convert.py @@ -0,0 +1,78 @@ +"""Module containing File Reader, File Writer, Json Parser, and Jsonl Serializer classes""" + + +import json +import sys + + +class FileReader: + """ + Reads a file and returns its contents as a string + """ + + def read(self, file_path): + with open(file_path, encoding="utf-8") as file: + return file.read() + + +class FileWriter: + """ + Writes a string to a file + """ + + def __init__(self, file_path): + self.file_path = file_path + + def write(self, content): + with open(self.file_path, "w", encoding="utf-8") as file: + file.write(content) + + +class StdoutWriter: + """ + Writes a string to stdout + """ + + def write(self, content): + sys.stdout.write(content) + sys.stdout.write("\n") + + +class JsonParser: + """ + Parses a string as JSON and returns the result + """ + + def parse(self, content): + return json.loads(content) + + +class JsonlSerializer: + """ + Serializes a list of JSON objects into a JSONL string + """ + + def serialize(self, data): + lines = [json.dumps(item) for item in data] + return "\n".join(lines) + + +class JsonToJsonlConverter: + """ + Converts a JSON file to JSONL + """ + + def __init__(self, file_reader, file_writer, json_parser, jsonl_serializer): + self.file_reader = file_reader + self.file_writer = file_writer + self.json_parser = json_parser + self.jsonl_serializer = jsonl_serializer + + def convert( + self, input_file_path, output_file_path + ): # pylint: disable=unused-argument + content = self.file_reader.read(input_file_path) + data = self.json_parser.parse(content) + # data = [r for r in data if r["conversations"]] # vicuna cleaned has rows with empty conversations + jsonl_content = self.jsonl_serializer.serialize(data) + self.file_writer.write(jsonl_content) diff --git a/src/axolotl/core/__init__.py b/src/axolotl/core/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/axolotl/core/trainer_builder.py b/src/axolotl/core/trainer_builder.py new file mode 100644 index 0000000000000000000000000000000000000000..4d85b40dee8a4c557b981b0b7ed4f61a973cc8a8 --- /dev/null +++ b/src/axolotl/core/trainer_builder.py @@ -0,0 +1,1533 @@ +# pylint: disable=too-many-lines +""" +Builder for the training args and trainer +""" + +import abc +import importlib +import importlib.util +import logging +import math +import sys +from abc import abstractmethod +from collections import defaultdict +from dataclasses import dataclass, field +from functools import wraps +from pathlib import Path +from typing import Dict, List, Literal, Optional, Type, Union + +import torch +import transformers +from datasets import Dataset +from torch.optim.lr_scheduler import OneCycleLR +from torch.utils.data import BatchSampler, DataLoader, RandomSampler, SequentialSampler +from transformers import ( + EarlyStoppingCallback, + Trainer, + TrainerCallback, + TrainingArguments, +) +from transformers.trainer_utils import seed_worker +from transformers.utils import is_sagemaker_mp_enabled +from trl import DPOTrainer +from trl.trainer.utils import pad_to_length + +from axolotl.loraplus import create_loraplus_optimizer +from axolotl.monkeypatch.multipack import SUPPORTED_MULTIPACK_MODEL_TYPES +from axolotl.monkeypatch.relora import ReLoRACallback, ReLoRAScheduler +from axolotl.utils.callbacks import ( + EvalFirstStepCallback, + GPUStatsCallback, + LossWatchDogCallback, + SaveAxolotlConfigtoWandBCallback, + SaveBetterTransformerModelCallback, + bench_eval_callback_factory, + causal_lm_bench_eval_callback_factory, + log_prediction_callback_factory, +) +from axolotl.utils.collators import ( + BatchSamplerDataCollatorForSeq2Seq, + DataCollatorForSeq2Seq, + MambaDataCollator, + V2BatchSamplerDataCollatorForSeq2Seq, +) +from axolotl.utils.samplers import MultipackBatchSampler, get_dataset_lengths +from axolotl.utils.schedulers import ( + get_cosine_schedule_with_min_lr, + get_cosine_schedule_with_quadratic_warmup, + get_cosine_schedule_with_warmup_decay_constant, +) + +if is_sagemaker_mp_enabled(): + import smdistributed.modelparallel.torch as smp + +try: + import torch._dynamo # pylint: disable=ungrouped-imports +except ImportError: + pass + +LOG = logging.getLogger("axolotl.core.trainer_builder") + + +def is_mlflow_available(): + return importlib.util.find_spec("mlflow") is not None + + +def _sanitize_kwargs_for_tagging(tag_names, kwargs=None): + if isinstance(tag_names, str): + tag_names = [tag_names] + + if kwargs is not None: + if "tags" not in kwargs: + kwargs["tags"] = tag_names + elif "tags" in kwargs and isinstance(kwargs["tags"], list): + kwargs["tags"].extend(tag_names) + elif "tags" in kwargs and isinstance(kwargs["tags"], str): + tag_names.append(kwargs["tags"]) + kwargs["tags"] = tag_names + + return kwargs + + +@dataclass +class AxolotlTrainingArguments(TrainingArguments): + """ + Extend the base TrainingArguments for axolotl helpers + """ + + model_type: Optional[str] = field( + default=None, metadata={"help": "HF model configuration model_type."} + ) + lr_quadratic_warmup: bool = field( + default=False, + metadata={"help": "Use quadratic warmup for cosine scheduling."}, + ) + pretraining: bool = field( + default=False, + metadata={ + "help": "Indicates to trainer whether we are doing continued pretraining." + }, + ) + sample_packing: bool = field( + default=False, + metadata={"help": "Use sample packing for efficient training."}, + ) + multipack_real_batches: bool = field( + default=False, + metadata={"help": "Use real batches for efficient training."}, + ) + eval_sample_packing: Optional[bool] = field( + default=None, + metadata={"help": "Use sample packing for efficient evals."}, + ) + sample_packing_efficiency: float = field( + default=1.0, + metadata={"help": "Sample packing efficiency for calculating batch length."}, + ) + max_seq_length: int = field( + default=2048, + metadata={"help": "The maximum sequence length the model can handle"}, + ) + sample_packing_seq_len_multiplier: int = field( + default=1, + metadata={"help": "the multiplier for the max len for packed sequences"}, + ) + relora_steps: Optional[int] = field( + default=None, + metadata={"help": "how often to reset for ReLoRA"}, + ) + relora_warmup_steps: Optional[int] = field( + default=None, + metadata={"help": "how many warmup steps to take after reset for ReLoRA"}, + ) + relora_anneal_steps: Optional[int] = field( + default=None, + metadata={"help": "how many warmup steps to take after reset for ReLoRA"}, + ) + relora_prune_ratio: Optional[float] = field( + default=0.9, + metadata={"help": "prune ratio for magnitude pruning of the optimizer"}, + ) + bench_split: Optional[str] = field( + default="eval", metadata={"help": "The benchmark split to run on"} + ) + bench_dataset: Optional[str] = field( + default="pharaouk/dharma-1/dharma_1_mini.json", + metadata={ + "help": "Benchmark dataset to use: options are `mmlu-zs`, `mmlu-fs`, or the full path to the dataset file" + }, + ) + do_bench_eval: Optional[bool] = field( + default=False, metadata={"help": "Whether to run the Benchmark evaluation."} + ) + do_causal_lm_eval: Optional[bool] = field( + default=False, metadata={"help": "Whether to run the Causal LM evaluation."} + ) + max_bench_samples: Optional[int] = field( + default=None, + metadata={ + "help": "If set, only evaluates on `max_bench_samples` of the benchmark dataset." + }, + ) + bench_source_max_len: int = field( + default=2048, metadata={"help": "Maximum source sequence length for bench."} + ) + dataloader_prefetch_factor: Optional[int] = field( + default=None, + metadata={"help": "prefetch_factor argument to the dataloader"}, + ) + cosine_min_lr_ratio: Optional[float] = field( + default=None, + metadata={"help": "Minimum learning rate is min_lr_ratio * learning_rate"}, + ) + cosine_constant_lr_ratio: Optional[float] = field( + default=None, + metadata={ + "help": "Starting constant learning rate step is cosine_constant_lr_ratio * max_steps" + }, + ) + loraplus_lr_ratio: Optional[float] = field( + default=None, metadata={"help": "loraplus learning rate ratio lr_B / lr_A."} + ) + loraplus_lr_embedding: Optional[float] = field( + default=1e-6, + metadata={"help": "loraplus learning rate for lora embedding layers."}, + ) + qlora: bool = field( + default=False, + metadata={"help": "whether this is a qlora training"}, + ) + orpo_alpha: Optional[float] = field( + default=None, + ) + + +class AxolotlTrainer(Trainer): + """ + Extend the base Trainer for axolotl helpers + """ + + args = None # type: AxolotlTrainingArguments + tag_names = ["axolotl"] + + def __init__( + self, + *_args, + num_epochs=1, + bench_data_collator=None, + eval_data_collator=None, + **kwargs, + ): + self.num_epochs = num_epochs + self.bench_data_collator = bench_data_collator + self.eval_data_collator = eval_data_collator + super().__init__(*_args, **kwargs) + self.train_data_collator = self.data_collator + self._stored_metrics = defaultdict(lambda: defaultdict(list)) + if self.args.orpo_alpha: + self.loss_fct = torch.nn.CrossEntropyLoss(reduction="none") + + def create_optimizer(self): + if self.args.loraplus_lr_ratio is None: + return super().create_optimizer() + + opt_model = self.model_wrapped if is_sagemaker_mp_enabled() else self.model + if self.optimizer is None: # pylint: disable=access-member-before-definition + optimizer_cls, optimizer_kwargs = Trainer.get_optimizer_cls_and_kwargs( + self.args, + opt_model, + ) + + loraplus_lr_ratio = getattr(self.args, "loraplus_lr_ratio", None) + loraplus_lr_embedding = getattr(self.args, "loraplus_lr_embedding", None) + self.optimizer = create_loraplus_optimizer( # pylint: disable=attribute-defined-outside-init + opt_model, + optimizer_cls, + optimizer_kwargs, + loraplus_lr_ratio, + loraplus_lr_embedding, + ) + + if is_sagemaker_mp_enabled(): + self.optimizer = smp.DistributedOptimizer( # pylint: disable=attribute-defined-outside-init + self.optimizer + ) + + return self.optimizer + + def create_scheduler( + self, num_training_steps: int, optimizer: torch.optim.Optimizer = None + ): + """ + Setup the scheduler. The optimizer of the trainer must have been set up either before this method is called or + passed as an argument. + + Args: + num_training_steps (int): The number of training steps to do. + optimizer (torch.optim.Optimizer): The training optimizer + """ + use_cosine_quadratic = ( + self.args.lr_scheduler_type == "cosine" + and self.args.lr_quadratic_warmup is True + ) + + use_cosine_min_lr = ( + self.args.lr_scheduler_type == "cosine" + and self.args.cosine_min_lr_ratio is not None + ) + + # fmt: off + if self.lr_scheduler is None: # type: ignore # pylint: disable=access-member-before-definition + # fmt: on + if use_cosine_quadratic: + if use_cosine_min_lr: + LOG.warning("Both cosine quadratic warmup and min lr detected. Using quadratic warmup.") + + self.lr_scheduler = get_cosine_schedule_with_quadratic_warmup( # pylint: disable=attribute-defined-outside-init + optimizer, + num_warmup_steps=self.args.get_warmup_steps(num_training_steps), + num_training_steps=num_training_steps, + ) + elif self.args.cosine_min_lr_ratio and self.args.cosine_constant_lr_ratio and use_cosine_min_lr: + assert 0 <= self.args.cosine_min_lr_ratio <= 1.0, "cosine_min_lr_ratio must be between 0.0 and 1.0" + assert 0 <= self.args.cosine_constant_lr_ratio <= 1.0, "cosine_constant_lr_ratio must be between 0.0 and 1.0" + self.lr_scheduler = get_cosine_schedule_with_warmup_decay_constant( # pylint: disable=attribute-defined-outside-init + optimizer, + num_warmup_steps=self.args.get_warmup_steps(num_training_steps), + num_training_steps=num_training_steps, + min_lr_ratio=self.args.cosine_min_lr_ratio, + constant_lr_ratio=self.args.cosine_constant_lr_ratio, + ) + elif self.args.cosine_min_lr_ratio and use_cosine_min_lr: + assert 0 <= self.args.cosine_min_lr_ratio <= 1.0, "cosine_min_lr_ratio must be between 0.0 and 1.0" + self.lr_scheduler = get_cosine_schedule_with_min_lr( # pylint: disable=attribute-defined-outside-init + optimizer, + num_warmup_steps=self.args.get_warmup_steps(num_training_steps), + num_training_steps=num_training_steps, + min_lr_ratio=self.args.cosine_min_lr_ratio, + ) + else: + return super().create_scheduler(num_training_steps, optimizer) + else: + if use_cosine_quadratic: + LOG.warning("axolotl's cosine scheduler with quadratic warmup not used (e.g., because of deepspeed).") + + if use_cosine_min_lr: + LOG.warning("axolotl's cosine scheduler with min lr not used (e.g., because of deepspeed).") + + return self.lr_scheduler + + def _get_train_sampler(self) -> Optional[torch.utils.data.Sampler]: + if self.args.sample_packing and not self.args.pretraining: + if self.args.multipack_real_batches: + batch_size = self.args.per_device_train_batch_size + batch_max_len = self.args.max_seq_length + else: + batch_size = 1 + batch_max_len = ( + self.args.per_device_train_batch_size * self.args.max_seq_length + ) + return MultipackBatchSampler( + RandomSampler(self.train_dataset), + batch_size=batch_size, + drop_last=True, + batch_max_len=batch_max_len, + lengths=get_dataset_lengths(self.train_dataset), + packing_efficiency_estimate=self.args.sample_packing_efficiency, + ) + return super()._get_train_sampler() + + def _get_eval_sampler( + self, eval_dataset: Dataset + ) -> Optional[torch.utils.data.Sampler]: + if self.args.sample_packing and self.args.eval_sample_packing is not False: + if self.args.multipack_real_batches: + batch_size = self.args.per_device_eval_batch_size + batch_max_len = self.args.max_seq_length + else: + batch_size = 1 + batch_max_len = ( + self.args.per_device_eval_batch_size * self.args.max_seq_length + ) + return MultipackBatchSampler( + SequentialSampler(eval_dataset), + batch_size=batch_size, + drop_last=True, + batch_max_len=batch_max_len, + lengths=get_dataset_lengths(eval_dataset), + packing_efficiency_estimate=self.args.sample_packing_efficiency, + ) + return super()._get_eval_sampler(eval_dataset) + + def get_train_dataloader(self) -> DataLoader: + if self.args.sample_packing and not self.args.pretraining: + train_dataset = self.train_dataset + if "length" in train_dataset.features.keys(): + train_dataset = train_dataset.remove_columns(["length"]) + data_collator = self.data_collator + dataloader_params = { + "batch_size": self._train_batch_size, + "collate_fn": data_collator, + "num_workers": self.args.dataloader_num_workers, + "pin_memory": self.args.dataloader_pin_memory, + } + if self.args.dataloader_prefetch_factor: + dataloader_params[ + "prefetch_factor" + ] = self.args.dataloader_prefetch_factor + + sampler = self._get_train_sampler() + if isinstance(sampler, BatchSampler): + dataloader_params["batch_sampler"] = sampler + del dataloader_params["batch_size"] + else: + dataloader_params["sampler"] = sampler + dataloader_params["drop_last"] = self.args.dataloader_drop_last + dataloader_params["worker_init_fn"] = seed_worker + + self.accelerator.even_batches = False + return self.accelerator.prepare_data_loader( + DataLoader(train_dataset, **dataloader_params) + ) + return super().get_train_dataloader() + + def get_eval_dataloader(self, eval_dataset: Optional[Dataset] = None) -> DataLoader: + if self.args.sample_packing and self.args.eval_sample_packing is False: + self.data_collator = ( # pylint: disable=attribute-defined-outside-init + self.eval_data_collator + ) + dataloader = super().get_eval_dataloader(eval_dataset) + self.data_collator = ( # pylint: disable=attribute-defined-outside-init + self.train_data_collator + ) + return dataloader + + if self.args.sample_packing and self.args.eval_sample_packing is not False: + eval_dataset = ( + eval_dataset if eval_dataset is not None else self.eval_dataset + ) + + eval_sampler = self._get_eval_sampler(eval_dataset) + eval_dataset = eval_dataset.remove_columns(["length"]) + data_collator = self.data_collator + dataloader_params = { + "batch_size": self.args.eval_batch_size, + "collate_fn": data_collator, + "num_workers": self.args.dataloader_num_workers, + "pin_memory": self.args.dataloader_pin_memory, + } + if self.args.dataloader_prefetch_factor: + dataloader_params[ + "prefetch_factor" + ] = self.args.dataloader_prefetch_factor + + if isinstance(eval_sampler, BatchSampler): + dataloader_params["batch_sampler"] = eval_sampler + del dataloader_params["batch_size"] + else: + dataloader_params["sampler"] = eval_sampler + dataloader_params["drop_last"] = self.args.dataloader_drop_last + + self.accelerator.even_batches = False + return self.accelerator.prepare_data_loader( + DataLoader(eval_dataset, **dataloader_params) + ) + + return super().get_eval_dataloader(eval_dataset) + + def _get_bench_sampler( + self, bench_dataset: Dataset + ) -> Optional[torch.utils.data.Sampler]: + if self.args.world_size <= 1: + return SequentialSampler(bench_dataset) + return None + + def get_bench_dataloader( + self, + bench_dataset: Dataset, + ) -> DataLoader: + dataloader_params = { + "batch_size": self.args.eval_batch_size, + "collate_fn": self.bench_data_collator, + "num_workers": self.args.dataloader_num_workers, + "pin_memory": self.args.dataloader_pin_memory, + } + if self.args.dataloader_prefetch_factor: + dataloader_params["prefetch_factor"] = self.args.dataloader_prefetch_factor + + if not isinstance(bench_dataset, torch.utils.data.IterableDataset): + dataloader_params["sampler"] = self._get_bench_sampler(bench_dataset) + dataloader_params["drop_last"] = self.args.dataloader_drop_last + + return DataLoader(bench_dataset, **dataloader_params) + # return self.accelerator.prepare(DataLoader(bench_dataset, **dataloader_params)) + + def compute_loss(self, model, inputs, return_outputs=False): + # use one's weighted cross entropy loss calc + # if self.args.sample_packing: + # labels = inputs.pop("labels") + # outputs = model(**inputs) + # loss = trainer_weighted_loss(outputs, labels, shift_labels=True) + # return (loss, outputs) if return_outputs else loss + if self.args.orpo_alpha: + return self.orpo_compute_loss(model, inputs, return_outputs=return_outputs) + return super().compute_loss(model, inputs, return_outputs=return_outputs) + + @staticmethod + def orpo_concatenate_inputs(inputs, label_pad_token=-100, pad_token=0, device=None): + concatenated_batch = {} + + max_length = max( + inputs["input_ids"].shape[1], inputs["rejected_input_ids"].shape[1] + ) + # Concatenate positive and negative inputs + concatenated_batch["input_ids"] = pad_to_length( + inputs["input_ids"], max_length, pad_token + ) + concatenated_batch["rejected_input_ids"] = pad_to_length( + inputs["rejected_input_ids"], max_length, pad_token + ) + concatenated_batch["labels"] = pad_to_length( + inputs["labels"], max_length, label_pad_token + ) + concatenated_batch["rejected_labels"] = pad_to_length( + inputs["rejected_labels"], max_length, label_pad_token + ) + concatenated_batch["attention_mask"] = pad_to_length( + inputs["attention_mask"], max_length, 0 + ) + concatenated_batch["rejected_attention_mask"] = pad_to_length( + inputs["rejected_attention_mask"], max_length, 0 + ) + concatenated_batch["prompt_attention_mask"] = pad_to_length( + inputs["prompt_attention_mask"], max_length, 0 + ).to(device=device) + + input_ids = torch.cat( + [concatenated_batch["input_ids"], concatenated_batch["rejected_input_ids"]], + dim=0, + ).to(device=device) + attention_mask = torch.cat( + [ + concatenated_batch["attention_mask"], + concatenated_batch["rejected_attention_mask"], + ], + dim=0, + ).to(device=device) + labels = torch.cat( + [concatenated_batch["labels"], concatenated_batch["rejected_labels"]], dim=0 + ).to(device=device) + + return { + "input_ids": input_ids, + "labels": labels, + "attention_mask": attention_mask, + "prompt_attention_mask": concatenated_batch["prompt_attention_mask"], + } + + def orpo_compute_custom_loss(self, logits, labels): + logits = logits.contiguous() + loss = 0.0 + + if labels is not None: + # move labels to correct device to enable model parallelism + labels = labels.to(logits.device) + # Shift so that tokens < n predict n + shift_logits = logits[..., :-1, :].contiguous() + shift_labels = labels[..., 1:].contiguous() + + # Flatten the tokens + loss = self.loss_fct(shift_logits.transpose(2, 1), shift_labels).mean( + dim=-1 + ) + + return loss + + def orpo_compute_logps( + self, prompt_attention_mask, chosen_inputs, chosen_attention_mask, logits + ): + # Get the shape of chosen_attention_mask[:, :-1] + chosen_shape = chosen_attention_mask[:, :-1].shape + + # Calculate the padding size + pad_length = chosen_shape[1] - (prompt_attention_mask.shape[1] - 1) + + # Pad prompt_attention_mask with zeros to match the desired shape + prompt_attention_mask_padded = torch.nn.functional.pad( + prompt_attention_mask[:, 1:], (0, pad_length), mode="constant", value=0 + ) + + # Perform the subtraction operation + mask = chosen_attention_mask[:, :-1] > prompt_attention_mask_padded + + per_token_logps = torch.gather( + logits[:, :-1, :].log_softmax(-1), + dim=2, + index=(mask * chosen_inputs[:, 1:]).unsqueeze(2), + ).squeeze(2) + return torch.mul(per_token_logps, mask).sum(dim=1) / mask.sum(dim=1) + + def orpo_compute_loss(self, model, inputs, return_outputs=False): + concat_inputs = AxolotlTrainer.orpo_concatenate_inputs( + inputs, + label_pad_token=-100, + pad_token=self.tokenizer.pad_token_id, + device=self.accelerator.device, + ) + + # Perform a single forward pass + outputs = model( + **{ + "input_ids": concat_inputs["input_ids"], + "attention_mask": concat_inputs["attention_mask"], + "labels": concat_inputs["labels"], + }, + output_hidden_states=True, + ) + + # Split the outputs for positive and negative examples + outputs_pos, outputs_neg = outputs.logits.chunk(2) + + # Calculate NLL loss + pos_loss = self.orpo_compute_custom_loss( + logits=outputs_pos, labels=concat_inputs["input_ids"].chunk(2)[0] + ) + + # Calculate Log Probability + pos_prob = self.orpo_compute_logps( + prompt_attention_mask=concat_inputs["prompt_attention_mask"], + chosen_inputs=concat_inputs["input_ids"].chunk(2)[0], + chosen_attention_mask=concat_inputs["attention_mask"].chunk(2)[0], + logits=outputs_pos, + ) + neg_prob = self.orpo_compute_logps( + prompt_attention_mask=concat_inputs["prompt_attention_mask"], + chosen_inputs=concat_inputs["input_ids"].chunk(2)[1], + chosen_attention_mask=concat_inputs["attention_mask"].chunk(2)[1], + logits=outputs_neg, + ) + + # Calculate log odds + log_odds = (pos_prob - neg_prob) - ( + torch.log(1 - torch.exp(pos_prob)) - torch.log(1 - torch.exp(neg_prob)) + ) + sig_ratio = torch.nn.functional.sigmoid(log_odds) + ratio = torch.log(sig_ratio) + + # Calculate the Final Loss + loss = torch.mean(pos_loss - self.args.orpo_alpha * ratio).to( + dtype=torch.bfloat16 + ) + + metrics = {} + metrics["chosen_geometric_mean"] = torch.mean(pos_prob).cpu().item() + metrics["rejected_geometric_mean"] = torch.mean(neg_prob).cpu().item() + metrics["log_odds_ratio"] = torch.mean(ratio).cpu().item() + metrics["log_odds"] = torch.mean(log_odds).cpu().item() + self.store_metrics(metrics, train_eval="train") + + return (loss, outputs_pos) if return_outputs else loss + + @wraps(Trainer.push_to_hub) + def push_to_hub(self, *args, **kwargs) -> str: + """ + Overwrite the `push_to_hub` method in order to force-add the tags when pushing the + model on the Hub. Please refer to `~transformers.Trainer.push_to_hub` for more details. + """ + kwargs = _sanitize_kwargs_for_tagging(tag_names=self.tag_names, kwargs=kwargs) + + return super().push_to_hub(*args, **kwargs) + + @wraps(Trainer.create_accelerator_and_postprocess) + def create_accelerator_and_postprocess(self): + res = super().create_accelerator_and_postprocess() + + if self.is_fsdp_enabled: + if ( + "limit_all_gathers" in self.args.fsdp_config + and self.args.fsdp_config["limit_all_gathers"] + ): + self.accelerator.state.fsdp_plugin.limit_all_gathers = True + + return res + + def log(self, logs: Dict[str, float]) -> None: + """ + Log `logs` on the various objects watching training, including stored metrics. + + Args: + logs (`Dict[str, float]`): + The values to log. + """ + # logs either has 'loss' or 'eval_loss' + train_eval = "train" if "loss" in logs else "eval" + # Add averaged stored metrics to logs + for key, metrics in self._stored_metrics[train_eval].items(): + logs[key] = torch.tensor(metrics).mean().item() + del self._stored_metrics[train_eval] + return super().log(logs) + + def store_metrics( + self, metrics: Dict[str, float], train_eval: Literal["train", "eval"] = "train" + ) -> None: + for key, value in metrics.items(): + self._stored_metrics[train_eval][key].append(value) + + +class AxolotlMambaTrainer(AxolotlTrainer): + """ + Mamba specific trainer to handle loss calculation + """ + + tag_names = ["axolotl", "mamba"] + + def compute_loss( + self, + model, + inputs, + return_outputs=False, # pylint: disable=unused-argument + ): + input_ids = inputs.pop("input_ids") + lm_logits = model(input_ids).logits + + labels = input_ids.to(lm_logits.device) + shift_logits = lm_logits[:, :-1, :].contiguous() + labels = labels[:, 1:].contiguous() + + loss_fct = torch.nn.CrossEntropyLoss() + lm_loss = loss_fct( + shift_logits.view(-1, shift_logits.size(-1)), labels.view(-1) + ) + + return lm_loss + + +class OneCycleLRSchedulerTrainer(AxolotlTrainer): + """ + Trainer subclass that uses the OneCycleLR scheduler + """ + + tag_names = ["axolotl", "onecycle"] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.lr_scheduler = None + + def create_scheduler( + self, + num_training_steps: int, + optimizer: Optional[torch.optim.Optimizer] = None, + ): + optimizer = self.optimizer if optimizer is None else optimizer + num_warmup_steps = self.args.get_warmup_steps(num_training_steps) + pct_start = num_warmup_steps / num_training_steps + + self.lr_scheduler = OneCycleLR( + optimizer, + max_lr=self.args.learning_rate, + total_steps=num_training_steps, + pct_start=pct_start, + div_factor=6, + ) + + return self.lr_scheduler + + +class ReLoRATrainer(AxolotlTrainer): + """ + Trainer subclass that uses the OneCycleLR scheduler + """ + + tag_names = ["axolotl", "relora"] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.lr_scheduler = None + + def create_scheduler( + self, + num_training_steps: int, + optimizer: Optional[torch.optim.Optimizer] = None, + ): + optimizer = self.optimizer if optimizer is None else optimizer + lr_scheduler = super().create_scheduler(num_training_steps, optimizer) + + if self.args.relora_steps: + warmup_steps = ( + self.args.relora_warmup_steps if self.args.relora_warmup_steps else 10 + ) + anneal_steps = ( + self.args.relora_anneal_steps if self.args.relora_anneal_steps else 1 + ) + self.lr_scheduler = ReLoRAScheduler( + optimizer, + lr_scheduler, + self.args.relora_steps, + anneal_steps, + warmup_steps, + ) + else: + self.lr_scheduler = lr_scheduler + + return self.lr_scheduler + + +class AxolotlDPOTrainer(DPOTrainer): + """ + Extend the base DPOTrainer for axolotl helpers + """ + + tag_names = ["axolotl", "dpo"] + + @wraps(DPOTrainer.push_to_hub) + def push_to_hub(self, *args, **kwargs) -> str: + """ + Overwrite the `push_to_hub` method in order to force-add the tags when pushing the + model on the Hub. Please refer to `~transformers.Trainer.push_to_hub` for more details. + """ + kwargs = _sanitize_kwargs_for_tagging(tag_names=self.tag_names, kwargs=kwargs) + + return super().push_to_hub(*args, **kwargs) + + +class TrainerBuilderBase(abc.ABC): + """ + Base class for trainer builder + """ + + _train_dataset = None + _eval_dataset = None + _model_ref = None + _peft_config = None + + def __init__(self, cfg, model, tokenizer): + self.cfg = cfg + self.model = model + self.tokenizer = tokenizer + + # in case the model supports tagging, add the axolotl tag. + # This makes sure the tag is correctly pushed even if a user calls + # model.push_to_hub instad of trainer.push_to_hub. + if hasattr(model, "add_model_tags"): + model.add_model_tags(["axolotl"]) + + @property + def model_ref(self): + return self._model_ref + + @model_ref.setter + def model_ref(self, model): + self._model_ref = model + + @property + def train_dataset(self): + return self._train_dataset + + @train_dataset.setter + def train_dataset(self, dataset): + self._train_dataset = dataset + + @property + def eval_dataset(self): + return self._eval_dataset + + @eval_dataset.setter + def eval_dataset(self, dataset): + self._eval_dataset = dataset + + @property + def peft_config(self): + return self._peft_config + + @peft_config.setter + def peft_config(self, peft_config): + self._peft_config = peft_config + + @abstractmethod + def build(self, total_num_steps): + pass + + def get_callbacks(self) -> List[TrainerCallback]: + callbacks = [] + if self.cfg.use_wandb: + callbacks.append( + SaveAxolotlConfigtoWandBCallback(self.cfg.axolotl_config_path) + ) + + return callbacks + + @abstractmethod + def get_post_trainer_create_callbacks(self, trainer): + """ + Callbacks added after the trainer is created, usually b/c these need access to the trainer + """ + + def hook_pre_create_training_args(self, training_arguments_kwargs): + # TODO + return training_arguments_kwargs + + def hook_post_create_training_args(self, training_arguments): + # TODO + return training_arguments + + def hook_pre_create_trainer(self, trainer_kwargs, trainer_cls): + # TODO + return trainer_kwargs, trainer_cls + + def hook_post_create_trainer(self, trainer): + # TODO + return trainer + + +class HFCausalTrainerBuilder(TrainerBuilderBase): + """ + Build the HuggingFace training args/trainer for Causal models + """ + + def get_callbacks(self): + callbacks = super().get_callbacks() + callbacks.append(GPUStatsCallback(self.cfg)) + callbacks.append(EvalFirstStepCallback()) + + if self.cfg.relora_steps: + callbacks.append(ReLoRACallback(self.cfg)) + + if ( + hasattr(self.model, "use_bettertransformer") + and self.model.use_bettertransformer is True + ): + callbacks.append(SaveBetterTransformerModelCallback()) + + if self.cfg.use_wandb: + callbacks.append( + SaveAxolotlConfigtoWandBCallback(self.cfg.axolotl_config_path) + ) + if self.cfg.use_mlflow and is_mlflow_available(): + from axolotl.utils.callbacks.mlflow_ import ( + SaveAxolotlConfigtoMlflowCallback, + ) + + callbacks.append( + SaveAxolotlConfigtoMlflowCallback(self.cfg.axolotl_config_path) + ) + + if self.cfg.loss_watchdog_threshold is not None: + callbacks.append(LossWatchDogCallback(self.cfg)) + + return callbacks + + def get_post_trainer_create_callbacks(self, trainer): + callbacks = [] + if self.cfg.use_wandb and self.cfg.eval_table_size > 0: + LogPredictionCallback = log_prediction_callback_factory( + trainer, self.tokenizer + ) + callbacks.append(LogPredictionCallback(self.cfg)) + + if self.cfg.do_bench_eval: + callbacks.append(bench_eval_callback_factory(trainer, self.tokenizer)) + if self.cfg.do_causal_lm_eval: + CausalLMBenchEvalCallback = causal_lm_bench_eval_callback_factory( + trainer, self.tokenizer + ) + callbacks.append(CausalLMBenchEvalCallback(self.cfg)) + + if self.cfg.early_stopping_patience: + early_stop_cb = EarlyStoppingCallback( + self.cfg.early_stopping_patience, + ) + callbacks.append(early_stop_cb) + + return callbacks + + def _get_trainer_cls(self): + if self.cfg.lr_scheduler == "one_cycle" and ( + self.cfg.fsdp or self.cfg.adapter == "qlora" + ): + return OneCycleLRSchedulerTrainer + if self.cfg.relora_steps: + return ReLoRATrainer + if self.cfg.model_config_type == "mamba": + return AxolotlMambaTrainer + return AxolotlTrainer + + def build(self, total_num_steps): + warmup_steps = None + if self.cfg.warmup_steps is not None: + warmup_steps = self.cfg.warmup_steps + elif self.cfg.warmup_ratio is not None: + warmup_steps = max(int(self.cfg.warmup_ratio * total_num_steps), 0) + else: + warmup_steps = min(int(0.03 * total_num_steps), 100) + + logging_steps = ( + self.cfg.logging_steps + if self.cfg.logging_steps is not None + else max(min(int(0.005 * total_num_steps), 10), 1) + ) + + training_arguments_kwargs = {} + if self.cfg.bf16 == "full": + training_arguments_kwargs["bf16_full_eval"] = True + else: + training_arguments_kwargs["bf16"] = self.cfg.bf16 + training_arguments_kwargs["fp16"] = ( + self.cfg.fp16 and not self.cfg.bf16 + ) or False + training_arguments_kwargs["tf32"] = self.cfg.tf32 + training_arguments_kwargs["warmup_steps"] = warmup_steps + training_arguments_kwargs["logging_steps"] = logging_steps + + if self.cfg.seed: + training_arguments_kwargs["seed"] = self.cfg.seed + + if self.cfg.gradient_checkpointing: + training_arguments_kwargs[ + "gradient_checkpointing" + ] = self.cfg.gradient_checkpointing + if self.cfg.gradient_checkpointing_kwargs is not None: + training_arguments_kwargs[ + "gradient_checkpointing_kwargs" + ] = self.cfg.gradient_checkpointing_kwargs + if self.cfg.fsdp: + training_arguments_kwargs["fsdp"] = self.cfg.fsdp + if self.cfg.fsdp_config: + training_arguments_kwargs["fsdp_config"] = dict(self.cfg.fsdp_config) + + if self.cfg.adapter == "qlora": + training_arguments_kwargs["qlora"] = True + + # deepspeed + if self.cfg.deepspeed: + training_arguments_kwargs["deepspeed"] = self.cfg.deepspeed + + if self.cfg.lr_quadratic_warmup is not None: + training_arguments_kwargs[ + "lr_quadratic_warmup" + ] = self.cfg.lr_quadratic_warmup + + if self.cfg.adam_beta1: + training_arguments_kwargs["adam_beta1"] = self.cfg.adam_beta1 + if self.cfg.adam_beta2: + training_arguments_kwargs["adam_beta2"] = self.cfg.adam_beta2 + if self.cfg.adam_epsilon: + training_arguments_kwargs["adam_epsilon"] = self.cfg.adam_epsilon + if self.cfg.max_grad_norm: + training_arguments_kwargs["max_grad_norm"] = self.cfg.max_grad_norm + + if self.cfg.hub_model_id: + training_arguments_kwargs["hub_model_id"] = self.cfg.hub_model_id + training_arguments_kwargs["push_to_hub"] = True + training_arguments_kwargs["hub_private_repo"] = True + training_arguments_kwargs["hub_always_push"] = True + + if self.cfg.hub_strategy: + training_arguments_kwargs["hub_strategy"] = self.cfg.hub_strategy + + if self.cfg.save_safetensors is not None: + training_arguments_kwargs["save_safetensors"] = self.cfg.save_safetensors + + if self.cfg.sample_packing_eff_est: + training_arguments_kwargs[ + "sample_packing_efficiency" + ] = self.cfg.sample_packing_eff_est + + if self.cfg.dataloader_pin_memory is not None: + training_arguments_kwargs[ + "dataloader_pin_memory" + ] = self.cfg.dataloader_pin_memory + if self.cfg.dataloader_num_workers is not None: + training_arguments_kwargs[ + "dataloader_num_workers" + ] = self.cfg.dataloader_num_workers + if self.cfg.dataloader_prefetch_factor is not None: + training_arguments_kwargs[ + "dataloader_prefetch_factor" + ] = self.cfg.dataloader_prefetch_factor + if self.cfg.dataloader_drop_last is not None: + training_arguments_kwargs[ + "dataloader_drop_last" + ] = self.cfg.dataloader_drop_last + elif self.cfg.sample_packing and self.cfg.eval_sample_packing is False: + training_arguments_kwargs["dataloader_drop_last"] = True + + if self.cfg.remove_unused_columns is not None: + training_arguments_kwargs[ + "remove_unused_columns" + ] = self.cfg.remove_unused_columns + + if not self.cfg.test_datasets and self.cfg.val_set_size == 0: + # no eval set, so don't eval + training_arguments_kwargs["evaluation_strategy"] = "no" + elif self.cfg.eval_steps: + training_arguments_kwargs["evaluation_strategy"] = "steps" + training_arguments_kwargs["eval_steps"] = self.cfg.eval_steps + elif self.cfg.evaluation_strategy: + training_arguments_kwargs[ + "evaluation_strategy" + ] = self.cfg.evaluation_strategy + else: + # we have an eval set, but no steps defined, default to use epoch + training_arguments_kwargs["evaluation_strategy"] = "epoch" + + if self.cfg.save_steps: + training_arguments_kwargs["save_strategy"] = "steps" + training_arguments_kwargs["save_steps"] = self.cfg.save_steps + elif self.cfg.save_strategy: + training_arguments_kwargs["save_strategy"] = self.cfg.save_strategy + else: + # default to saving each epoch if not defined + training_arguments_kwargs["save_strategy"] = "epoch" + + if self.cfg.do_bench_eval: + training_arguments_kwargs["do_bench_eval"] = self.cfg.do_bench_eval + if self.cfg.bench_dataset: + training_arguments_kwargs["bench_dataset"] = self.cfg.bench_dataset + if self.cfg.do_causal_lm_eval: + training_arguments_kwargs["do_causal_lm_eval"] = self.cfg.do_causal_lm_eval + if self.cfg.metric_for_best_model: + training_arguments_kwargs[ + "metric_for_best_model" + ] = self.cfg.metric_for_best_model + if self.cfg.greater_is_better: + training_arguments_kwargs["greater_is_better"] = self.cfg.greater_is_better + + if self.cfg.torch_compile: + if torch.__version__ < "2.1.0": # pylint: disable=protected-access + LOG.warning("torch>=2.1.0 required for torch_compile to work properly") + elif torch._dynamo: # pylint: disable=protected-access + torch._dynamo.config.suppress_errors = ( # pylint: disable=protected-access + True + ) + training_arguments_kwargs["torch_compile"] = self.cfg.torch_compile + if self.cfg.torch_compile_backend: + training_arguments_kwargs[ + "torch_compile_backend" + ] = self.cfg.torch_compile_backend + + # DDP Config + if self.cfg.ddp_timeout: + training_arguments_kwargs["ddp_timeout"] = self.cfg.ddp_timeout + # see https://pytorch.org/docs/stable/generated/torch.nn.parallel.DistributedDataParallel.html + if self.cfg.ddp_bucket_cap_mb: + training_arguments_kwargs["ddp_bucket_cap_mb"] = self.cfg.ddp_bucket_cap_mb + if self.cfg.ddp_broadcast_buffers is not None: + training_arguments_kwargs[ + "ddp_broadcast_buffers" + ] = self.cfg.ddp_broadcast_buffers + + # these are all the "standard" kwargs that are def used + training_arguments_kwargs["max_steps"] = ( + total_num_steps if self.cfg.max_steps else -1 + ) + training_arguments_kwargs["max_seq_length"] = self.cfg.sequence_len + training_arguments_kwargs[ + "per_device_train_batch_size" + ] = self.cfg.micro_batch_size + if self.cfg.eval_batch_size: + training_arguments_kwargs[ + "per_device_eval_batch_size" + ] = self.cfg.eval_batch_size + training_arguments_kwargs[ + "gradient_accumulation_steps" + ] = self.cfg.gradient_accumulation_steps + training_arguments_kwargs[ + "eval_accumulation_steps" + ] = self.cfg.gradient_accumulation_steps + training_arguments_kwargs["num_train_epochs"] = self.cfg.num_epochs + training_arguments_kwargs["learning_rate"] = self.cfg.learning_rate + training_arguments_kwargs["output_dir"] = self.cfg.output_dir + training_arguments_kwargs["save_total_limit"] = ( + self.cfg.save_total_limit if self.cfg.save_total_limit else 4 + ) + training_arguments_kwargs["load_best_model_at_end"] = ( + ( + self.cfg.load_best_model_at_end is not False + or self.cfg.early_stopping_patience + ) + and ( + (not self.cfg.test_datasets and self.cfg.val_set_size > 0) + or (self.cfg.test_datasets and self.cfg.val_set_size == 0) + ) + and self.cfg.save_steps + and self.cfg.eval_steps + and self.cfg.save_steps % self.cfg.eval_steps == 0 + ) or False + training_arguments_kwargs["ddp_find_unused_parameters"] = ( + False if self.cfg.ddp else None + ) + training_arguments_kwargs["group_by_length"] = self.cfg.group_by_length + report_to = None + if self.cfg.use_wandb: + report_to = "wandb" + if self.cfg.use_mlflow: + report_to = "mlflow" + training_arguments_kwargs["report_to"] = report_to + training_arguments_kwargs["run_name"] = ( + self.cfg.wandb_name if self.cfg.use_wandb else None + ) + training_arguments_kwargs["optim"] = ( + self.cfg.optimizer if self.cfg.optimizer else "adamw_hf" + ) + if self.cfg.optim_args: + if isinstance(self.cfg.optim_args, dict): + optim_args = ",".join( + [f"{key}={value}" for key, value in self.cfg.optim_args.items()] + ) + else: + optim_args = self.cfg.optim_args + training_arguments_kwargs["optim_args"] = optim_args + if self.cfg.optim_target_modules: + training_arguments_kwargs[ + "optim_target_modules" + ] = self.cfg.optim_target_modules + training_arguments_kwargs["loraplus_lr_ratio"] = self.cfg.loraplus_lr_ratio + training_arguments_kwargs[ + "loraplus_lr_embedding" + ] = self.cfg.loraplus_lr_embedding + training_arguments_kwargs["lr_scheduler_type"] = ( + self.cfg.lr_scheduler + if self.cfg.lr_scheduler + and self.cfg.lr_scheduler not in ("one_cycle", "log_sweep") + else "cosine" + ) + training_arguments_kwargs["lr_scheduler_kwargs"] = ( + self.cfg.lr_scheduler_kwargs if self.cfg.lr_scheduler_kwargs else {} + ) + training_arguments_kwargs["cosine_min_lr_ratio"] = self.cfg.cosine_min_lr_ratio + training_arguments_kwargs[ + "cosine_constant_lr_ratio" + ] = self.cfg.cosine_constant_lr_ratio + training_arguments_kwargs["weight_decay"] = ( + self.cfg.weight_decay if self.cfg.weight_decay is not None else 0.0 + ) + training_arguments_kwargs["sample_packing"] = ( + self.cfg.sample_packing if self.cfg.sample_packing else False + ) + training_arguments_kwargs["multipack_real_batches"] = ( + self.cfg.flash_attention is not True + ) + training_arguments_kwargs["eval_sample_packing"] = ( + self.cfg.sample_packing + if self.cfg.eval_sample_packing is not False + else False + ) + training_arguments_kwargs[ + "sample_packing_seq_len_multiplier" + ] = self.cfg.micro_batch_size + if self.cfg.relora_steps: + training_arguments_kwargs["relora_steps"] = self.cfg.relora_steps + training_arguments_kwargs[ + "relora_warmup_steps" + ] = self.cfg.relora_warmup_steps + if self.cfg.relora_anneal_steps: + training_arguments_kwargs[ + "relora_anneal_steps" + ] = self.cfg.relora_anneal_steps + if self.cfg.relora_prune_ratio: + training_arguments_kwargs[ + "relora_prune_ratio" + ] = self.cfg.relora_prune_ratio + + training_arguments_kwargs = self.hook_pre_create_training_args( + training_arguments_kwargs + ) + training_arguments_kwargs["model_type"] = self.cfg.model_config_type + training_arguments_kwargs["pretraining"] = bool(self.cfg.pretraining_dataset) + + if self.cfg.rl == "orpo": + training_arguments_kwargs["orpo_alpha"] = self.cfg.orpo_alpha + + if self.cfg.neftune_noise_alpha is not None: + training_arguments_kwargs[ + "neftune_noise_alpha" + ] = self.cfg.neftune_noise_alpha + + trainer_kwargs = {} + + if self.cfg.optimizer == "lion_pytorch": + from lion_pytorch import Lion + + lion_kwargs = {"lr": training_arguments_kwargs["learning_rate"]} + if "weight_decay" in training_arguments_kwargs: + lion_kwargs["weight_decay"] = training_arguments_kwargs["weight_decay"] + + if ( + "adam_beta1" in training_arguments_kwargs + and "adam_beta2" in training_arguments_kwargs + ): + lion_kwargs["betas"] = ( + training_arguments_kwargs["adam_beta1"], + training_arguments_kwargs["adam_beta2"], + ) + + trainer_kwargs["optimizers"] = ( + Lion(params=self.model.parameters(), **lion_kwargs), + None, + ) + # Set default so transformers doesn't throw + training_arguments_kwargs["optim"] = "adamw_hf" + + if self.cfg.optimizer == "adamw_anyprecision": + if Path(self.cfg.torchdistx_path).exists(): + sys.path.append(self.cfg.torchdistx_path) + importlib.import_module("torchdistx") + + training_args = ( + AxolotlTrainingArguments( # pylint: disable=unexpected-keyword-arg + **training_arguments_kwargs, + ) + ) + training_args = self.hook_post_create_training_args(training_args) + + data_collator_kwargs = { + "padding": True, # True/"longest" is the default + } + if self.cfg.pad_to_sequence_len: + data_collator_kwargs["pad_to_multiple_of"] = 64 * math.ceil( + self.cfg.sequence_len / 64 + ) + else: + # A100 is best at 64, while others at 8. Let's use the larger so we don't have to check + # https://docs.nvidia.com/deeplearning/performance/dl-performance-matrix-multiplication/index.html + data_collator_kwargs["pad_to_multiple_of"] = 64 + + trainer_cls = self._get_trainer_cls() + trainer_kwargs, trainer_cls = self.hook_pre_create_trainer( + trainer_kwargs, trainer_cls + ) + trainer = trainer_cls( + model=self.model, + train_dataset=self.train_dataset, + eval_dataset=self.eval_dataset, + args=training_args, + tokenizer=self.tokenizer, + data_collator=self.build_collator(training_args, **data_collator_kwargs), + eval_data_collator=self.build_collator( + training_args, is_eval=True, **data_collator_kwargs + ), + bench_data_collator=transformers.DataCollatorForSeq2Seq( + self.tokenizer, + return_tensors="pt", + **data_collator_kwargs, + ), + callbacks=self.get_callbacks(), + num_epochs=self.cfg.num_epochs, + **trainer_kwargs, + ) + trainer = self.hook_post_create_trainer(trainer) + for callback in self.get_post_trainer_create_callbacks(trainer): + trainer.add_callback(callback) + + if self.cfg.deepspeed and self.cfg.sample_packing: + trainer.accelerator.state.deepspeed_plugin.deepspeed_config[ + "train_micro_batch_size_per_gpu" + ] = self.cfg.micro_batch_size + + return trainer + + def build_collator( + self, training_args: AxolotlTrainingArguments, is_eval=False, **kwargs + ): + if training_args.pretraining: + return None + + if self.cfg.model_config_type == "mamba": + return MambaDataCollator(tokenizer=self.tokenizer) + + use_batch_sampler_collator = False + if is_eval is False and training_args.sample_packing: + use_batch_sampler_collator = True + if is_eval and training_args.eval_sample_packing: + use_batch_sampler_collator = True + + collator: Type[ + Union[ + V2BatchSamplerDataCollatorForSeq2Seq, + BatchSamplerDataCollatorForSeq2Seq, + DataCollatorForSeq2Seq, + ] + ] + if use_batch_sampler_collator: + if self.cfg.model_config_type in SUPPORTED_MULTIPACK_MODEL_TYPES: + collator = V2BatchSamplerDataCollatorForSeq2Seq + elif ( + self.cfg.model_config_type in ["llama"] + and self.cfg.flash_attention is not True + ): + collator = V2BatchSamplerDataCollatorForSeq2Seq + else: + collator = BatchSamplerDataCollatorForSeq2Seq + else: + collator = DataCollatorForSeq2Seq + + return collator( + self.tokenizer, + return_tensors="pt", + **kwargs, + ) + + +class HFDPOTrainerBuilder(TrainerBuilderBase): + """ + Trainer factory class for DPO Trainer + """ + + def get_callbacks(self): + callbacks = super().get_callbacks() + return callbacks + + def get_post_trainer_create_callbacks(self, trainer): + callbacks = [] + return callbacks + + def build_training_arguments(self, total_num_steps): + training_args_kwargs = {} + for arg in [ + "adam_beta1", + "adam_beta2", + "adam_epsilon", + "dataloader_num_workers", + "dataloader_pin_memory", + ]: + if hasattr(self.cfg, arg) and getattr(self.cfg, arg) is not None: + training_args_kwargs[arg] = getattr(self.cfg, arg) + + if self.cfg.hub_model_id: + training_args_kwargs["hub_model_id"] = self.cfg.hub_model_id + training_args_kwargs["push_to_hub"] = True + training_args_kwargs["hub_private_repo"] = True + training_args_kwargs["hub_always_push"] = True + + if self.cfg.hub_strategy: + training_args_kwargs["hub_strategy"] = self.cfg.hub_strategy + + if self.cfg.save_safetensors is not None: + training_args_kwargs["save_safetensors"] = self.cfg.save_safetensors + + if self.eval_dataset: + training_args_kwargs["evaluation_strategy"] = "steps" + training_args_kwargs["eval_steps"] = self.cfg.eval_steps + else: + training_args_kwargs["evaluation_strategy"] = "no" + if self.cfg.bf16 or self.cfg.bfloat16: + training_args_kwargs["bf16"] = True + + training_args_kwargs["lr_scheduler_type"] = ( + self.cfg.lr_scheduler if self.cfg.lr_scheduler else "cosine" + ) + training_args_kwargs["lr_scheduler_kwargs"] = ( + self.cfg.lr_scheduler_kwargs if self.cfg.lr_scheduler_kwargs else {} + ) + if self.cfg.remove_unused_columns is not None: + training_args_kwargs[ + "remove_unused_columns" + ] = self.cfg.remove_unused_columns + else: + training_args_kwargs["remove_unused_columns"] = False + + if self.cfg.dataloader_pin_memory is not None: + training_args_kwargs[ + "dataloader_pin_memory" + ] = self.cfg.dataloader_pin_memory + if self.cfg.dataloader_num_workers is not None: + training_args_kwargs[ + "dataloader_num_workers" + ] = self.cfg.dataloader_num_workers + if self.cfg.dataloader_prefetch_factor is not None: + training_args_kwargs[ + "dataloader_prefetch_factor" + ] = self.cfg.dataloader_prefetch_factor + if self.cfg.gradient_checkpointing: + training_args_kwargs[ + "gradient_checkpointing" + ] = self.cfg.gradient_checkpointing + if self.cfg.gradient_checkpointing_kwargs is not None: + training_args_kwargs[ + "gradient_checkpointing_kwargs" + ] = self.cfg.gradient_checkpointing_kwargs + else: + training_args_kwargs["gradient_checkpointing_kwargs"] = { + "use_reentrant": False + } + + # set save_strategy and save_steps + if self.cfg.save_steps: + training_args_kwargs["save_strategy"] = "steps" + training_args_kwargs["save_steps"] = self.cfg.save_steps + elif self.cfg.save_strategy: + training_args_kwargs["save_strategy"] = self.cfg.save_strategy + else: + # default to saving each epoch if not defined + training_args_kwargs["save_strategy"] = "epoch" + + training_args = TrainingArguments( + per_device_train_batch_size=self.cfg.micro_batch_size, + max_steps=self.cfg.max_steps or total_num_steps, + gradient_accumulation_steps=self.cfg.gradient_accumulation_steps, + learning_rate=self.cfg.learning_rate, + output_dir=self.cfg.output_dir, + warmup_steps=self.cfg.warmup_steps, + logging_first_step=True, + logging_steps=1, + optim=self.cfg.optimizer, + save_total_limit=self.cfg.save_total_limit or 5, + **training_args_kwargs, + ) + + return training_args + + def build(self, total_num_steps): + training_args = self.build_training_arguments(total_num_steps) + dpo_trainer_kwargs = {} + if self.cfg.rl == "ipo": + dpo_trainer_kwargs["loss_type"] = "ipo" + if self.cfg.dpo_label_smoothing: + dpo_trainer_kwargs["label_smoothing"] = self.cfg.dpo_label_smoothing + elif self.cfg.rl == "kto_pair": + dpo_trainer_kwargs["loss_type"] = "kto_pair" + if self.eval_dataset: + dpo_trainer_kwargs["eval_dataset"] = self.eval_dataset + if self.cfg.adapter and self.peft_config: + dpo_trainer_kwargs["peft_config"] = self.peft_config + if self.cfg.precompute_ref_log_probs is not None: + dpo_trainer_kwargs[ + "precompute_ref_log_probs" + ] = self.cfg.precompute_ref_log_probs + dpo_trainer = AxolotlDPOTrainer( + self.model, + self.model_ref, + args=training_args, + beta=self.cfg.dpo_beta or 0.1, + train_dataset=self.train_dataset, + tokenizer=self.tokenizer, + max_length=self.cfg.sequence_len, + max_target_length=None, + max_prompt_length=self.cfg.sequence_len, + generate_during_eval=True, + callbacks=self.get_callbacks(), + **dpo_trainer_kwargs, + ) + dpo_trainer = self.hook_post_create_trainer(dpo_trainer) + for callback in self.get_post_trainer_create_callbacks(dpo_trainer): + dpo_trainer.add_callback(callback) + + return dpo_trainer + + +class HFPPOTrainerBuilder(TrainerBuilderBase): + """ + HF Factory class for PPO Trainer + """ + + def get_callbacks(self): + callbacks = [] + return callbacks + + def get_post_trainer_create_callbacks(self, trainer): + callbacks = [] + return callbacks + + def build(self, total_num_steps): + # build PPOConfig + pass diff --git a/src/axolotl/core/trainers/__init__.py b/src/axolotl/core/trainers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/axolotl/core/trainers/trl.py b/src/axolotl/core/trainers/trl.py new file mode 100644 index 0000000000000000000000000000000000000000..24c0b041232f59e3df49b933bd1361000aa68b72 --- /dev/null +++ b/src/axolotl/core/trainers/trl.py @@ -0,0 +1,66 @@ +""" +module for TRL PPO training +""" +import torch +from tqdm import tqdm +from trl import PPOTrainer + + +class TRLPPOTrainer(PPOTrainer): + """ + wrapper for ppo trainer to handle customizations + """ + + def train( + self, + reward_pipe, + resume_from_checkpoint=None, # pylint: disable=unused-argument + ): + generation_kwargs = { + "min_length": -1, + "top_k": 0.0, + "top_p": 1.0, + "do_sample": True, + "pad_token_id": self.tokenizer.eos_token_id, + "max_new_tokens": 32, + } + sent_kwargs = { + "return_all_scores": True, + "function_to_apply": "none", + "batch_size": 16, + } + + for epoch, batch in tqdm( # pylint: disable=unused-variable + enumerate(self.dataloader) + ): + query_tensors = batch["input_ids"] + + # generate model response + response_tensors, ref_response_tensors = self.generate( + query_tensors, + return_prompt=False, + generate_ref_response=True, + **generation_kwargs + ) + batch["response"] = self.tokenizer.batch_decode(response_tensors) + batch["ref_response"] = self.tokenizer.batch_decode(ref_response_tensors) + + # Compute sentiment score + texts = [q + r for q, r in zip(batch["query"], batch["response"])] + pipe_outputs = reward_pipe(texts, **sent_kwargs) + rewards = [torch.tensor(output[1]["score"]) for output in pipe_outputs] + ref_texts = [q + r for q, r in zip(batch["query"], batch["ref_response"])] + ref_pipe_outputs = reward_pipe(ref_texts, **sent_kwargs) + ref_rewards = [ + torch.tensor(output[1]["score"]) for output in ref_pipe_outputs + ] + batch["ref_rewards"] = ref_rewards + + # Run PPO step + stats = self.step(query_tensors, response_tensors, rewards) + self.log_stats( + stats, + batch, + rewards, + columns_to_log=["query", "response", "ref_response", "ref_rewards"], + ) diff --git a/src/axolotl/datasets.py b/src/axolotl/datasets.py new file mode 100644 index 0000000000000000000000000000000000000000..b5638a614d56d44f2749e2ceaf494a6ffda656ac --- /dev/null +++ b/src/axolotl/datasets.py @@ -0,0 +1,188 @@ +"""Module containing Dataset functionality""" + +import logging +import os +from typing import List, Optional + +import torch +from datasets import Dataset, IterableDataset + +from .prompt_tokenizers import PromptTokenizingStrategy + +# We want this to be a wrapper for an existing dataset that we have loaded +# lets use the concept of middlewares to wrap each dataset, for example +# ConstantLengthDataset(ShuffledDataset([TokenizedPromptDataset(alpaca_dataset)])) +# let's check to ensure we don't truncate an item in the middle, we'll use +# the collators later on to pad the datasets + +LOG = logging.getLogger("axolotl") + + +class TokenizedPromptDataset(Dataset): + """ + Dataset that returns tokenized prompts from a stream of text files. + Args: + prompt_tokenizer (PromptTokenizingStrategy): The prompt tokenizing method for processing the data. + dataset (dataset.Dataset): Dataset with text files. + process_count (int): Number of processes to use for tokenizing. + keep_in_memory (bool): Whether to keep the tokenized dataset in memory. + """ + + def __init__( # pylint: disable=super-init-not-called + self, + prompt_tokenizer: PromptTokenizingStrategy, + dataset: Dataset, + process_count: Optional[int] = None, + keep_in_memory: Optional[bool] = False, + **kwargs, + ): + self.prompt_tokenizer = prompt_tokenizer + self.process_count = process_count + self.keep_in_memory = keep_in_memory + super().__init__( + self.process(dataset).data, + **kwargs, + ) + + def process(self, dataset): + features = dataset.features.keys() + num_proc = min(64, self.process_count if self.process_count else os.cpu_count()) + + map_kwargs = {} + if self.prompt_tokenizer.supports_batched: + map_kwargs["batched"] = True + map_kwargs["batch_size"] = 100 + return dataset.map( + self.prompt_tokenizer.tokenize_prompt, + num_proc=num_proc, + remove_columns=features, + keep_in_memory=self.keep_in_memory, + desc="Tokenizing Prompts", + **map_kwargs, + ) + + +# TODO this isn't the best since it can't interleave datasets +class ConstantLengthDataset(IterableDataset): + """ + Iterable dataset that returns constant length chunks of tokens from stream of text files. + Args: + tokenizer (Tokenizer): The processor used for processing the data. + dataset (dataset.Dataset): Dataset with text files. + seq_length (int): Length of token sequences to return. + """ + + def __init__( # pylint: disable=super-init-not-called + self, + tokenizer, + datasets, + seq_length=2048, + ): + self.tokenizer = tokenizer + self.concat_token_id = tokenizer.eos_token_id + self.datasets: List[IterableDataset] = datasets + self.seq_length = seq_length + + vocab_size = len(tokenizer.get_vocab()) + + if vocab_size <= torch.iinfo(torch.int16).max: + self.tokens_dtype = torch.int16 + elif vocab_size <= torch.iinfo(torch.int32).max: + self.tokens_dtype = torch.int32 + else: + self.tokens_dtype = torch.int64 + + def __iter__(self): + buffer = { + "input_ids": [], + "attention_mask": [], + "labels": [], + "position_ids": [], + } + buffer_len = 0 + for dataset in self.datasets: + idx = 0 + iterator = iter(dataset) + more_examples = True + while more_examples: + try: + example = next(iterator) + idx += 1 + except StopIteration: + more_examples = False + example = None + + add_concat_token = False + if example: + example_len = len(example["input_ids"]) + add_concat_token = example["input_ids"][-1] != self.concat_token_id + else: + example_len = 0 + + if not example_len or ( + buffer_len + int(add_concat_token) + example_len > self.seq_length + ): + if buffer["input_ids"]: + input_ids = torch.cat(buffer["input_ids"], dim=-1)[ + : self.seq_length + ] + attention_mask = torch.cat(buffer["attention_mask"], dim=-1)[ + : self.seq_length + ] + position_ids = torch.cat(buffer["position_ids"], dim=-1)[ + : self.seq_length + ] + labels = torch.cat(buffer["labels"], dim=-1)[: self.seq_length] + if labels.size() == input_ids.size() and ( + attention_mask.size() == input_ids.size() + ): + yield { + "input_ids": input_ids, + "labels": labels, + "attention_mask": attention_mask, + "position_ids": position_ids, + } + else: + LOG.warning( + f"dropping batch due to tensor size mismatch input_ids: {input_ids.size()}, labels: {labels.size()}, attention_mask: {attention_mask.size()}" + ) + buffer = { + "input_ids": [], + "attention_mask": [], + "labels": [], + "position_ids": [], + } + buffer_len = 0 + idx = 1 + + if example: + # FIXME + # just going to drop data points that are too long + if len(example["input_ids"]) <= self.seq_length: + input_ids = example["input_ids"] + attention_mask = example["attention_mask"] + labels = example["labels"] + + if add_concat_token: + input_ids.append(self.concat_token_id) + attention_mask.append(1) + labels.append(self.concat_token_id) + + input_ids_with_concat = torch.tensor( + input_ids, dtype=self.tokens_dtype + ) + attention_mask_with_concat = torch.tensor( + [idx * m for m in attention_mask], dtype=torch.int16 + ) + labels_with_concat = torch.tensor( + labels, dtype=self.tokens_dtype + ) + position_ids = torch.arange( + len(input_ids), dtype=self.tokens_dtype + ) + + buffer["input_ids"].append(input_ids_with_concat) + buffer["attention_mask"].append(attention_mask_with_concat) + buffer["labels"].append(labels_with_concat) + buffer["position_ids"].append(position_ids) + buffer_len += len(input_ids) diff --git a/src/axolotl/logging_config.py b/src/axolotl/logging_config.py new file mode 100644 index 0000000000000000000000000000000000000000..2ddf89a8c4b0efe68389e7776b21a3180c9c4191 --- /dev/null +++ b/src/axolotl/logging_config.py @@ -0,0 +1,72 @@ +""" +Common logging module for axolotl +""" + +import os +import sys +from logging import Formatter +from logging.config import dictConfig +from typing import Any, Dict + +from colorama import Fore, Style, init + + +class ColorfulFormatter(Formatter): + """ + Formatter to add coloring to log messages by log type + """ + + COLORS = { + "WARNING": Fore.YELLOW, + "ERROR": Fore.RED, + "CRITICAL": Fore.RED + Style.BRIGHT, + } + + def format(self, record): + record.rank = int(os.getenv("LOCAL_RANK", "0")) + log_message = super().format(record) + return self.COLORS.get(record.levelname, "") + log_message + Fore.RESET + + +DEFAULT_LOGGING_CONFIG: Dict[str, Any] = { + "version": 1, + "disable_existing_loggers": False, + "formatters": { + "simple": { + "format": "[%(asctime)s] [%(levelname)s] [%(name)s.%(funcName)s:%(lineno)d] [PID:%(process)d] %(message)s", + }, + "colorful": { + "()": ColorfulFormatter, + "format": "[%(asctime)s] [%(levelname)s] [%(name)s.%(funcName)s:%(lineno)d] [PID:%(process)d] [RANK:%(rank)d] %(message)s", + }, + }, + "filters": {}, + "handlers": { + "console": { + "class": "logging.StreamHandler", + "formatter": "simple", + "filters": [], + "stream": sys.stdout, + }, + "color_console": { + "class": "logging.StreamHandler", + "formatter": "colorful", + "filters": [], + "stream": sys.stdout, + }, + }, + "root": {"handlers": ["console"], "level": os.getenv("LOG_LEVEL", "INFO")}, + "loggers": { + "axolotl": { + "handlers": ["color_console"], + "level": "DEBUG", + "propagate": False, + }, + }, +} + + +def configure_logging(): + """Configure with default logging""" + init() # Initialize colorama + dictConfig(DEFAULT_LOGGING_CONFIG) diff --git a/src/axolotl/loraplus.py b/src/axolotl/loraplus.py new file mode 100644 index 0000000000000000000000000000000000000000..b4abec55ad4aa4669718f4a418a27fcdb99f6fd3 --- /dev/null +++ b/src/axolotl/loraplus.py @@ -0,0 +1,133 @@ +"""Module for LoRA+""" + +# MIT License +# +# Copyright (c) 2024 nikhil-ghosh-berkeley +# https://github.com/nikhil-ghosh-berkeley/loraplus + +import logging +from functools import reduce + +from peft.tuners import lora +from torch import nn +from transformers.pytorch_utils import ALL_LAYERNORM_LAYERS +from transformers.trainer_pt_utils import get_parameter_names + +LOG = logging.getLogger("axolotl.loraplus") + + +def get_module(name, opt_model): + """ + Retrieve a module from a model using its parameter name. + Args: + name (str): Full name of the parameter, typically including module path. + opt_model (torch.nn.Module): The model from which to retrieve the module. + + Returns: + Module corresponding to the given name. + """ + parent_idx = 2 if "lora" in name else 1 + module_names = name.split(sep=".")[:-parent_idx] + module = reduce(getattr, module_names, opt_model) + return module + + +def create_loraplus_optimizer( + opt_model, + optimizer_cls, + optimizer_kwargs, + loraplus_lr_ratio, + loraplus_lr_embedding=None, +): + """ + Creates an optimizer for the given model, applying LoRA-specific learning rate adjustments to different parameter groups. + + Args: + opt_model (torch.nn.Module): The model for which the optimizer is being created. + optimizer_cls (class): The class of the optimizer to be used (e.g., torch.optim.Adam). + optimizer_kwargs (dict): A dictionary of keyword arguments for the optimizer's initialization. + loraplus_lr_ratio (float): The learning rate ratio to be applied to LoRA parameters. + loraplus_lr_embedding (float, optional): A specific learning rate for embedding parameters, with a default value if not provided. + + Returns: + An instance of the specified optimizer class configured with the model's parameters organized into groups with custom learning rates. + """ + + assert loraplus_lr_ratio is not None, "loraplus_lr_ratio must be provided." + + if loraplus_lr_embedding is None: + loraplus_lr_embedding = 1e-6 + + decay_parameters = get_parameter_names(opt_model, ALL_LAYERNORM_LAYERS) + decay_parameters = [name for name in decay_parameters if "bias" not in name] + param_groups = { + "groupA": {}, + "groupB": {}, + "groupB_no_decay": {}, + "embedding": {}, + } + + for name, param in opt_model.named_parameters(): + if not param.requires_grad: + continue + + module = get_module(name, opt_model) + if isinstance(module, lora.Embedding): + param_groups["embedding"][name] = param + elif "lora_B" in name or param.ndim == 1: + if name in decay_parameters: + param_groups["groupB"][name] = param + else: + param_groups["groupB_no_decay"][name] = param + else: + param_groups["groupA"][name] = param + + assigned_param_groups = "" + for group, group_params in param_groups.items(): + assigned_param_groups += f"{group}\n {list(group_params.keys())}\n\n" + LOG.info(assigned_param_groups) + + lr = optimizer_kwargs["lr"] # pylint: disable=invalid-name + weight_decay = optimizer_kwargs.get("weight_decay", 0.0) + + optimizer_grouped_parameters = [ + { + "params": list(param_groups["groupA"].values()), + "weight_decay": weight_decay, + "lr": lr, + }, + { + "params": list(param_groups["embedding"].values()), + "weight_decay": weight_decay, + "lr": loraplus_lr_embedding, + }, + { + "params": list(param_groups["groupB"].values()), + "weight_decay": weight_decay, + "lr": lr * loraplus_lr_ratio, + }, + { + "params": list(param_groups["groupB_no_decay"].values()), + "weight_decay": 0.0, + "lr": lr * loraplus_lr_ratio, + }, + ] + + optimizer = optimizer_cls(optimizer_grouped_parameters, **optimizer_kwargs) + if optimizer_cls.__name__ == "Adam8bit": + import bitsandbytes + + manager = bitsandbytes.optim.GlobalOptimManager.get_instance() + + skipped = 0 + for module in opt_model.modules(): + if isinstance(module, nn.Embedding): + skipped += sum( + {p.data_ptr(): p.numel() for p in module.parameters()}.values() + ) + LOG.info(f"skipped {module}: {skipped/2**20}M params") + manager.register_module_override(module, "weight", {"optim_bits": 32}) + LOG.debug(f"bitsandbytes: will optimize {module} in fp32") + LOG.info(f"skipped: {skipped/2**20}M params") + + return optimizer diff --git a/src/axolotl/models/__init__.py b/src/axolotl/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/axolotl/models/mamba/__init__.py b/src/axolotl/models/mamba/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..fee88e3a434979b72f2c47d7741dbbdf21b1996d --- /dev/null +++ b/src/axolotl/models/mamba/__init__.py @@ -0,0 +1,24 @@ +""" +Modeling module for Mamba models +""" + +import importlib + + +def check_mamba_ssm_installed(): + mamba_ssm_spec = importlib.util.find_spec("mamba_ssm") + if mamba_ssm_spec is None: + raise ImportError( + "MambaLMHeadModel requires mamba_ssm. Please install it with `pip install -e .[mamba-ssm]`" + ) + + +def fix_mamba_attn_for_loss(): + check_mamba_ssm_installed() + + from mamba_ssm.models import mixer_seq_simple + + from .modeling_mamba import MambaLMHeadModel as MambaLMHeadModelFixed + + mixer_seq_simple.MambaLMHeadModel = MambaLMHeadModelFixed + return mixer_seq_simple.MambaLMHeadModel # pylint: disable=invalid-name diff --git a/src/axolotl/models/mamba/configuration_mamba.py b/src/axolotl/models/mamba/configuration_mamba.py new file mode 100644 index 0000000000000000000000000000000000000000..5160ee8d7e06a6d7c2dd3d0d9ae401ba3a288ef8 --- /dev/null +++ b/src/axolotl/models/mamba/configuration_mamba.py @@ -0,0 +1,42 @@ +""" +HF Transformers MambaConfig +""" +from transformers import PretrainedConfig + + +class MambaConfig(PretrainedConfig): + """ + modeling configuration for state space model/mamba + """ + + model_type = "mamba" + + def __init__( + self, + vocab_size=50280, + d_model=2560, + n_layer=64, + rms_norm=True, + residual_in_fp32=True, + fused_add_norm=True, + pad_vocab_size_multiple=8, + pad_token_id=50277, + bos_token_id=0, + eos_token_id=0, + tie_word_embeddings=False, + **kwargs, + ): + self.vocab_size = vocab_size + self.d_model = d_model + self.n_layer = n_layer + self.rms_norm = rms_norm + self.residual_in_fp32 = residual_in_fp32 + self.fused_add_norm = fused_add_norm + self.pad_vocab_size_multiple = pad_vocab_size_multiple + super().__init__( + pad_token_id=pad_token_id, + bos_token_id=bos_token_id, + eos_token_id=eos_token_id, + tie_word_embeddings=tie_word_embeddings, + **kwargs, + ) diff --git a/src/axolotl/models/mamba/modeling_mamba.py b/src/axolotl/models/mamba/modeling_mamba.py new file mode 100644 index 0000000000000000000000000000000000000000..70e9c88c882f595b849a63e2c274f6853ccc5ec2 --- /dev/null +++ b/src/axolotl/models/mamba/modeling_mamba.py @@ -0,0 +1,128 @@ +# pylint: skip-file +import os +from collections import namedtuple +from functools import partial +from typing import Optional, Union + +import torch +from mamba_ssm.models.mixer_seq_simple import MixerModel, _init_weights +from mamba_ssm.utils.generation import GenerationMixin +from mamba_ssm.utils.hf import load_config_hf, load_state_dict_hf +from torch import nn +from torch.nn import CrossEntropyLoss + +from axolotl.models.mamba.configuration_mamba import MambaConfig + + +class MambaLMHeadModel(nn.Module, GenerationMixin): + def __init__( + self, + d_model: int, + n_layer: int, + vocab_size: int, + initializer_cfg=None, + pad_vocab_size_multiple: int = 1, + device=None, + dtype=None, + **backbone_kwargs, + ) -> None: + factory_kwargs = {"device": device, "dtype": dtype} + super().__init__() + if vocab_size % pad_vocab_size_multiple != 0: + vocab_size += pad_vocab_size_multiple - ( + vocab_size % pad_vocab_size_multiple + ) + self.config = MambaConfig( + vocab_size=vocab_size, + d_model=d_model, + n_layer=n_layer, + pad_vocab_size_multiple=pad_vocab_size_multiple, + ) + self.backbone = MixerModel( + d_model=d_model, + n_layer=n_layer, + vocab_size=vocab_size, + initializer_cfg=initializer_cfg, + **backbone_kwargs, + **factory_kwargs, + ) + self.lm_head = nn.Linear(d_model, vocab_size, bias=False, **factory_kwargs) + + # Initialize weights and apply final processing + self.apply( + partial( + _init_weights, + n_layer=n_layer, + **(initializer_cfg if initializer_cfg is not None else {}), + ) + ) + self.tie_weights() + + def tie_weights(self): + self.lm_head.weight = self.backbone.embedding.weight + + def allocate_inference_cache(self, batch_size, max_seqlen, dtype=None, **kwargs): + return self.backbone.allocate_inference_cache( + batch_size, max_seqlen, dtype=dtype, **kwargs + ) + + def forward( + self, + input_ids, + position_ids=None, + inference_params=None, + num_last_tokens=0, + labels=None, + **kwargs, + ): + """ + "position_ids" is just to be compatible with Transformer generation. We don't use it. + num_last_tokens: if > 0, only return the logits for the last n tokens + """ + hidden_states = self.backbone(input_ids, inference_params=inference_params) + if num_last_tokens > 0: + hidden_states = hidden_states[:, -num_last_tokens:] + lm_logits = self.lm_head(hidden_states) + + CausalLMOutput = namedtuple("CausalLMOutput", ["logits"]) + return CausalLMOutput(logits=lm_logits) + + loss = None + if labels is not None: + logits = lm_logits + # Shift so that tokens < n predict n + shift_logits = logits[..., :-1, :].contiguous() + shift_labels = labels[..., 1:].contiguous() + # Flatten the tokens + loss_fct = CrossEntropyLoss() + shift_logits = shift_logits.view(-1, self.config.vocab_size) + shift_labels = shift_labels.view(-1) + # Enable model parallelism + shift_labels = shift_labels.to(shift_logits.device) + loss = loss_fct(shift_logits, shift_labels) + CausalLMOutput = namedtuple("CausalLMOutput", ["logits", "loss"]) + print(loss) + return CausalLMOutput(logits=lm_logits, loss=loss) + + else: + CausalLMOutput = namedtuple("CausalLMOutput", ["logits"]) + return CausalLMOutput(logits=lm_logits) + + def save_pretrained( + self, + save_directory: Union[str, os.PathLike], + state_dict: Optional[dict] = None, + safe_serialization: Optional[bool] = None, # pylint: disable=unused-argument + ): + if state_dict is None: + state_dict = self.state_dict() + torch.save(state_dict, os.path.join(save_directory, "pytorch_model.bin")) + + @classmethod + def from_pretrained(cls, pretrained_model_name, device=None, dtype=None, **kwargs): + config = load_config_hf(pretrained_model_name) + model = cls(**config, device=device, dtype=dtype, **kwargs) + model.load_state_dict( + load_state_dict_hf(pretrained_model_name, device={"": device}, dtype=dtype) + ) + return model diff --git a/src/axolotl/monkeypatch/btlm_attn_hijack_flash.py b/src/axolotl/monkeypatch/btlm_attn_hijack_flash.py new file mode 100644 index 0000000000000000000000000000000000000000..1275906804b0f6908ef94be9b94d4a83960bf148 --- /dev/null +++ b/src/axolotl/monkeypatch/btlm_attn_hijack_flash.py @@ -0,0 +1,66 @@ +""" +Flash attention monkey patch for cerebras btlm model +""" + +import importlib +import logging +from typing import Optional, Tuple + +import torch +from accelerate import init_empty_weights +from flash_attn.flash_attn_interface import flash_attn_func +from transformers import AutoConfig, AutoModelForCausalLM + +LOG = logging.getLogger("axolotl") + + +def replace_btlm_attn_with_flash_attn(model_name="cerebras/btlm-3b-8k-base"): + # this is a wonky hack to get the remotely loaded module + model_config = AutoConfig.from_pretrained(model_name, trust_remote_code=True) + # we need to load the model here in order for modeling_btlm to be available + with init_empty_weights(): + AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True) + module_name = model_config.__class__.__module__.replace( + ".configuration_btlm", ".modeling_btlm" + ) + modeling_btlm = importlib.import_module(module_name) + modeling_btlm.BTLMAttention._attn = ( # pylint: disable=protected-access + flashattn_attn + ) + + +def flashattn_attn( + self, + query: torch.Tensor, + key: Optional[torch.Tensor] = None, + value: Optional[torch.Tensor] = None, + attention_mask: Optional[torch.Tensor] = None, # pylint: disable=unused-argument + head_mask: Optional[torch.Tensor] = None, + position_bias: Optional[torch.Tensor] = None, # pylint: disable=unused-argument +) -> Tuple[torch.Tensor, Optional[torch.Tensor]]: + softmax_scale = ( + 1 / (key.size(-1) ** self.attn_scale_power) if self.scale_attn_weights else None + ) + + query = query.permute(0, 2, 1, 3) + key = key.permute(0, 2, 1, 3) + value = value.permute(0, 2, 1, 3) + + # Perform Flash attention + attn_output = flash_attn_func( + query, + key, + value, + dropout_p=0.0, # Assuming you have this attribute + softmax_scale=softmax_scale, # Set this if you have specific scaling in mind + causal=not self.is_cross_attention, # Assuming you have this attribute + return_attn_probs=False, # Set this based on your needs + ) + + # Optional: Apply head mask if it's not None + if head_mask is not None: + attn_output *= head_mask + + attn_output = attn_output.permute(0, 2, 1, 3) + + return attn_output, None # We don't have explicit attn_weights in Flash attention diff --git a/src/axolotl/monkeypatch/data/__init__.py b/src/axolotl/monkeypatch/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/axolotl/monkeypatch/data/batch_dataset_fetcher.py b/src/axolotl/monkeypatch/data/batch_dataset_fetcher.py new file mode 100644 index 0000000000000000000000000000000000000000..2e9364e3a5faa39a6084d259a4cb2d876a9f2e57 --- /dev/null +++ b/src/axolotl/monkeypatch/data/batch_dataset_fetcher.py @@ -0,0 +1,46 @@ +"""monkey patches for the dataset fetcher to handle batches of packed indexes""" +# pylint: disable=protected-access + +import torch +from torch.utils.data._utils.fetch import _BaseDatasetFetcher +from torch.utils.data._utils.worker import _worker_loop + + +class _MapDatasetFetcher(_BaseDatasetFetcher): + def fetch(self, possibly_batched_index): + if isinstance(possibly_batched_index[0], list): + data = [None for i in possibly_batched_index] + for i, possibly_batched_index_ in enumerate(possibly_batched_index): + if self.auto_collation: + if ( + hasattr(self.dataset, "__getitems__") + and self.dataset.__getitems__ + ): + data[i] = self.dataset.__getitems__(possibly_batched_index_) + else: + data[i] = [self.dataset[idx] for idx in possibly_batched_index_] + else: + data[i] = self.dataset[possibly_batched_index_] + else: + if self.auto_collation: + if hasattr(self.dataset, "__getitems__") and self.dataset.__getitems__: + data = self.dataset.__getitems__(possibly_batched_index) + else: + data = [self.dataset[idx] for idx in possibly_batched_index] + else: + data = self.dataset[possibly_batched_index] + return self.collate_fn(data) + + +def patch_fetchers(): + torch.utils.data._utils.fetch._MapDatasetFetcher = _MapDatasetFetcher + torch.utils.data.dataloader._utils.fetch._MapDatasetFetcher = _MapDatasetFetcher + + +def patched_worker_loop(*args, **kwargs): + patch_fetchers() + return _worker_loop(*args, **kwargs) + + +torch.utils.data._utils.worker._worker_loop = patched_worker_loop +patch_fetchers() diff --git a/src/axolotl/monkeypatch/fastchat_conversation_turns.py b/src/axolotl/monkeypatch/fastchat_conversation_turns.py new file mode 100644 index 0000000000000000000000000000000000000000..d09ab5075d8a779d4930190436052f2a2164c3b6 --- /dev/null +++ b/src/axolotl/monkeypatch/fastchat_conversation_turns.py @@ -0,0 +1,212 @@ +""" +monkeypatch to add a get_turns method +""" + +import logging +from typing import Generator, Tuple + +from fastchat.conversation import SeparatorStyle + +LOG = logging.getLogger("axolotl.monkeypatch.fastchat_conversation_turns") + + +def get_prompt(self) -> str: + ret = "" + for role, msg in self.get_turns(): + ret += role + msg + return ret + + +def get_turns( # pylint: disable=too-many-return-statements + self, +) -> Generator[Tuple[str, str], None, None]: + """Get the prompt for generation.""" + system_prompt = self.system_template.format(system_message=self.system_message) + if self.sep_style == SeparatorStyle.ADD_COLON_SINGLE: + yield "", system_prompt + self.sep + for role, message in self.messages: + if message: + yield role + ": ", message + self.sep + else: + yield role + ":", "" + return + if self.sep_style == SeparatorStyle.ADD_COLON_TWO: + seps = [self.sep, self.sep2] + yield "", system_prompt + seps[0] + for i, (role, message) in enumerate(self.messages): + if message: + yield role + ": ", message + seps[i % 2] + else: + yield role + ":", "" + return + if self.sep_style == SeparatorStyle.ADD_COLON_SPACE_SINGLE: + yield "", system_prompt + self.sep + for role, message in self.messages: + if message: + yield role + ": ", message + self.sep + else: + yield role + ": ", "" # must be end with a space + return + if self.sep_style == SeparatorStyle.ADD_NEW_LINE_SINGLE: + yield "", "" if system_prompt == "" else system_prompt + self.sep + for role, message in self.messages: + if message: + yield role + "\n", message + self.sep + else: + yield role + "\n", "" + return + if self.sep_style == SeparatorStyle.NO_COLON_SINGLE: + yield "", system_prompt + for role, message in self.messages: + if message: + yield role, message + self.sep + else: + yield role, "" + return + if self.sep_style == SeparatorStyle.NO_COLON_TWO: + seps = [self.sep, self.sep2] + yield "", system_prompt + for i, (role, message) in enumerate(self.messages): + if message: + yield role, message + seps[i % 2] + else: + yield role, "" + return + if self.sep_style == SeparatorStyle.RWKV: + yield "", system_prompt + for i, (role, message) in enumerate(self.messages): + if message: + yield role + ": ", message.replace("\r\n", "\n").replace( + "\n\n", "\n" + ) + "\n\n" + else: + yield role + ":", "" + return + if self.sep_style == SeparatorStyle.LLAMA2 and self.name != "mistral": + if self.system_message: + if self.messages: + # For llama, the system message is incorporated into the first human instruction + first_role, first_msg = self.messages[0] + if first_role == self.roles[0]: + system_prompt += first_msg + self.messages.pop(0) + yield "", system_prompt + for i, (role, message) in enumerate(self.messages): + if message: + if (i % 2 == 0 and not self.system_message) or ( + i % 2 != 0 and self.system_message + ): + role = " " + role + yield role + " ", message + else: + yield role, "" + return + if self.sep_style == SeparatorStyle.LLAMA2 and self.name == "mistral": + contains_sys_msg = False + if self.system_message: + contains_sys_msg = True + if self.messages: + # There is no clear guidance on how to handle system messages in Mistral so we just prepend it to the first human instruction separated by a newline + first_role, first_msg = self.messages[0] + if first_role == self.roles[0]: + system_prompt = self.system_template.format( + system_message=" " + self.system_message + ) + system_prompt += first_msg + self.messages.pop(0) + yield "", system_prompt + for i, (role, message) in enumerate(self.messages): + if message and i == 0 and not contains_sys_msg: + yield "", system_prompt.strip() + " " + message # if there is no system message, we need to make sure there is the a ` [INST]` at the beginning of the first instruction. + elif message: + yield role + " ", message + else: + yield role, "" + return + if self.sep_style == SeparatorStyle.CHATGLM: + # source: https://huggingface.co/THUDM/chatglm-6b/blob/1d240ba371910e9282298d4592532d7f0f3e9f3e/modeling_chatglm.py#L1302-L1308 + # source2: https://huggingface.co/THUDM/chatglm2-6b/blob/e186c891cf64310ac66ef10a87e6635fa6c2a579/modeling_chatglm.py#L926 + round_add_n = 1 if self.name == "chatglm2" else 0 + if system_prompt: + yield "", system_prompt + self.sep + + for i, (role, message) in enumerate(self.messages): + if i % 2 == 0: + yield "", f"[Round {i//2 + round_add_n}]{self.sep}" + + if message: + yield f"{role}:", f"{message}{self.sep}" + else: + yield f"{role}:", "" + return + if self.sep_style == SeparatorStyle.CHATML: + yield "", "" if system_prompt == "" else system_prompt + self.sep + "\n" + for role, message in self.messages: + if message: + yield role + "\n", message + self.sep + "\n" + else: + yield role + "\n", "" + return + if self.sep_style == SeparatorStyle.CHATGLM3: + if self.system_message: + yield "", system_prompt + for role, message in self.messages: + if message: + yield role + "\n", " " + message + else: + yield role + return + if self.sep_style == SeparatorStyle.CHATINTERN: + # source: https://huggingface.co/internlm/internlm-chat-7b-8k/blob/bd546fa984b4b0b86958f56bf37f94aa75ab8831/modeling_internlm.py#L771 + seps = [self.sep, self.sep2] + yield "", system_prompt + for i, (role, message) in enumerate(self.messages): + prefix = "" if i % 2 == 0 else "" + if message: + yield prefix + role + ":", message + seps[i % 2] + "\n" + else: + yield role + ":", "" + return + if self.sep_style == SeparatorStyle.DOLLY: + seps = [self.sep, self.sep2] + yield "", system_prompt + for i, (role, message) in enumerate(self.messages): + if message: + suffix = "\n\n" if i % 2 == 1 else "" + yield role + ":\n", message + seps[i % 2] + suffix + else: + yield role + ":\n", "" + return + if self.sep_style == SeparatorStyle.PHOENIX: + yield "", system_prompt + for role, message in self.messages: + if message: + yield role + ": ", "" + message + "" + else: + yield role + ": " + "", "" + return + if self.sep_style == SeparatorStyle.ROBIN: + yield "", system_prompt + self.sep + for role, message in self.messages: + if message: + yield role + ":\n", message + self.sep + else: + yield role + ":\n", "" + return + if self.sep_style == SeparatorStyle.FALCON_CHAT: + if self.system_message: + yield "", system_prompt + self.sep + for role, message in self.messages: + if message: + yield role + ": ", message + self.sep + else: + yield role + ":", "" + else: + raise ValueError(f"Invalid style: {self.sep_style}") + + +def add_get_turns_to_conversation(): + import fastchat.conversation + + fastchat.conversation.Conversation.get_turns = get_turns + fastchat.conversation.Conversation.get_prompt = get_prompt diff --git a/src/axolotl/monkeypatch/llama_attn_hijack_flash.py b/src/axolotl/monkeypatch/llama_attn_hijack_flash.py new file mode 100644 index 0000000000000000000000000000000000000000..f727c74b8297965b7691e6a18229106d21e37511 --- /dev/null +++ b/src/axolotl/monkeypatch/llama_attn_hijack_flash.py @@ -0,0 +1,940 @@ +"""Flash attention monkey patch for llama model""" + +# copied from https://github.com/lm-sys/FastChat/blob/main/fastchat/train/llama_flash_attn_monkey_patch.py + +import logging +import warnings +from functools import partial +from typing import List, Optional, Tuple, Union + +import torch +import torch.nn.functional as F +import transformers +from einops import rearrange +from flash_attn.bert_padding import pad_input, unpad_input +from transformers.modeling_outputs import BaseModelOutputWithPast +from transformers.models.llama.modeling_llama import LlamaAttention +from transformers.models.llama.modeling_llama import ( + LlamaDecoderLayer as OriginalLlamaDecoderLayer, +) +from transformers.models.llama.modeling_llama import ( + LlamaMLP, + apply_rotary_pos_emb, + repeat_kv, +) +from xformers.ops import SwiGLU + +from axolotl.monkeypatch.utils import get_cu_seqlens_from_pos_ids, set_module_name + +try: + from flash_attn.flash_attn_interface import ( # pylint: disable=ungrouped-imports + flash_attn_kvpacked_func, + flash_attn_varlen_kvpacked_func, + flash_attn_varlen_qkvpacked_func, + ) +except ImportError: + from flash_attn.flash_attn_interface import ( + flash_attn_unpadded_kvpacked_func as flash_attn_varlen_kvpacked_func, + ) + from flash_attn.flash_attn_interface import ( + flash_attn_unpadded_qkvpacked_func as flash_attn_varlen_qkvpacked_func, + ) + + +LOG = logging.getLogger("axolotl") + + +def is_xformers_swiglu_available() -> bool: + from xformers.ops.common import get_xformers_operator + + try: + get_xformers_operator("swiglu_packedw")() + return True + except RuntimeError as exc: + if "No such operator xformers::swiglu_packedw " in str(exc): + return False + return True + + +def replace_llama_mlp_with_swiglu(model): + for name, module in model.named_modules(): + if isinstance(module, LlamaMLP): + mlp = FusedMLP( + module.config, module.gate_proj, module.up_proj, module.down_proj + ) + set_module_name(model, name, mlp) + + +def replace_llama_qkv_with_fused(model): + for name, module in model.named_modules(): + if isinstance(module, LlamaAttention): + qkv = FusedAttention( + module.config, + module.q_proj, + module.k_proj, + module.v_proj, + module.o_proj, + ) + set_module_name(model, name, qkv) + + +def replace_llama_attn_with_flash_attn( + packed: Optional[bool] = False, + cross_entropy: Optional[bool] = False, + rms_norm: Optional[bool] = False, + use_shifted_sparse_attn: Optional[bool] = False, +): + transformers.models.llama.modeling_llama.LlamaModel._prepare_decoder_attention_mask = ( # pylint: disable=protected-access + _prepare_decoder_attention_mask + ) + if use_shifted_sparse_attn: + transformers.models.llama.modeling_llama.LlamaAttention.forward = ( + flashattn_forward_with_s2attn + ) + else: + transformers.models.llama.modeling_llama.LlamaAttention.forward = ( + flashattn_forward + ) + + if packed: + transformers.models.llama.modeling_llama.LlamaDecoderLayer = LlamaDecoderLayer + transformers.models.llama.modeling_llama.LlamaModel.forward = ( + llama_model_forward + ) + + # skip only if explicitly disabled + if cross_entropy: + try: + from flash_attn.losses.cross_entropy import CrossEntropyLoss + + LOG.info("patching with flash_attn.losses.cross_entropy") + transformers.models.llama.modeling_llama.CrossEntropyLoss = partial( + CrossEntropyLoss, inplace_backward=True + ) + except ImportError: + LOG.info( + "optimized flash-attention CrossEntropyLoss not found (run `pip install 'git+https://github.com/Dao-AILab/flash-attention.git#egg=xentropy_cuda_lib&subdirectory=csrc/xentropy'`)" + ) + + # skip only if explicitly disabled + if rms_norm: + try: + from flash_attn.ops.rms_norm import RMSNorm + + class LlamaRMSNorm(RMSNorm): + """Patched LLamaRMSNorm""" + + def __init__(self, hidden_size, eps=1e-6): + super().__init__(hidden_size, eps=eps) + + LOG.info("patching with flash_attn.ops.rms_norm") + transformers.models.llama.modeling_llama.LlamaRMSNorm = LlamaRMSNorm + except ImportError: + LOG.info( + "optimized flash-attention RMSNorm not found (run `pip install 'git+https://github.com/Dao-AILab/flash-attention.git#egg=dropout_layer_norm&subdirectory=csrc/layer_norm'`)" + ) + + +class FusedAttention(LlamaAttention): + """ + Fused QKV Attention layer for incrementally improved training efficiency + """ + + def __init__( + self, + config, + q: torch.nn.Linear, # pylint: disable=invalid-name + k: torch.nn.Linear, # pylint: disable=invalid-name + v: torch.nn.Linear, # pylint: disable=invalid-name + o: torch.nn.Linear, # pylint: disable=invalid-name + ): + super().__init__(config) + self.config = config + self.init_device = next(iter(q.state_dict().values())).device + + # define equivalent fused qkv projection + self.out_features: List[int] = [q.out_features, k.out_features, v.out_features] + self.qkv_proj = torch.nn.Linear( + q.in_features, sum(self.out_features), device=self.init_device, bias=False + ) + self.o_proj = o + + # overwrite initialized weights with pretrained weights + self.qkv_proj.weight.data = torch.cat( + (q.weight.data, k.weight.data, v.weight.data), dim=0 + ) + + def _post_training(self, model, name): + q_proj, k_proj, v_proj = torch.split( + self.qkv_proj.weight.data, self.out_features, dim=0 + ) + + new_attn = LlamaAttention(self.config) + new_attn.q_proj.weight.data = q_proj + new_attn.k_proj.weight.data = k_proj + new_attn.v_proj.weight.data = v_proj + new_attn.o_proj.weight.data = self.o_proj.weight.data + + set_module_name(model, name, new_attn) + + +class FusedMLP(torch.nn.Module): + """ + Fused MLP layer for incrementally improved training efficiency + """ + + def __init__( + self, + config, + gate_proj: torch.nn.Linear, + up_proj: torch.nn.Linear, + down_proj: torch.nn.Linear, + ): + super().__init__() + self.config = config + self.swiglu = SwiGLU( + in_features=config.hidden_size, + hidden_features=config.intermediate_size, + bias=False, + _pack_weights=True, + ) + # overwrite initialized weights with pretrained weights + self.swiglu.w12.weight.data = torch.cat( + (gate_proj.weight.data, up_proj.weight.data), dim=0 + ) + self.swiglu.w3.weight.data = down_proj.weight.data + + def _post_training(self, model, name): + w1, w2 = torch.split( # pylint: disable=invalid-name + self.swiglu.w12.weight.data, self.config.intermediate_size, dim=0 + ) + + # Assign the split weights back to the original layers + new_mlp = LlamaMLP(self.config) + new_mlp.gate_proj.weight.data = w1 + new_mlp.up_proj.weight.data = w2 + new_mlp.down_proj.weight.data = self.swiglu.w3.weight.data + + set_module_name(model, name, new_mlp) + + def forward(self, x: torch.Tensor) -> torch.Tensor: # pylint: disable=invalid-name + return self.swiglu(x) + + +# Disable the transformation of the attention mask in LlamaModel as the flash attention +# requires the attention mask to be the same as the key_padding_mask +def _prepare_decoder_attention_mask( + self, + attention_mask, + input_shape, + inputs_embeds, + past_key_values_length, +): # pylint: disable=unused-argument + # [bsz, seq_len] + return attention_mask + + +GROUP_SIZE_RATIO = 1 / 4 + + +def flashattn_forward_with_s2attn( + self, + hidden_states: torch.Tensor, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.Tensor] = None, + past_key_value: Optional[Tuple[torch.Tensor]] = None, + output_attentions: bool = False, + use_cache: bool = False, + padding_mask: Optional[torch.LongTensor] = None, # pylint: disable=unused-argument + cu_seqlens: Optional[torch.Tensor] = None, # pylint: disable=unused-argument + max_seqlen: Optional[torch.Tensor] = None, # pylint: disable=unused-argument +) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: + """Input shape: Batch x Time x Channel + + From: https://github.com/dvlab-research/LongLoRA/blob/main/llama_attn_replace.py + + attention_mask: [bsz, q_len] + + `cu_seqlens` will be ignored if provided + `max_seqlen` will be ignored if provided + """ + if output_attentions: + warnings.warn( + "Output attentions is not supported for patched `LlamaAttention`, returning `None` instead." + ) + + bsz, q_len, _ = hidden_states.size() + + query_states = ( + self.q_proj(hidden_states) + .view(bsz, q_len, self.num_heads, self.head_dim) + .transpose(1, 2) + ) + key_states = ( + self.k_proj(hidden_states) + .view(bsz, q_len, self.num_key_value_heads, self.head_dim) + .transpose(1, 2) + ) + value_states = ( + self.v_proj(hidden_states) + .view(bsz, q_len, self.num_key_value_heads, self.head_dim) + .transpose(1, 2) + ) + # [bsz, q_len, nh, hd] + # [bsz, nh, q_len, hd] + # pylint: disable=duplicate-code + + kv_seq_len = key_states.shape[-2] + if past_key_value is not None: + kv_seq_len += past_key_value[0].shape[-2] + cos, sin = self.rotary_emb( + value_states, seq_len=kv_seq_len, position_ids=position_ids + ) + query_states, key_states = apply_rotary_pos_emb( + query_states, key_states, cos, sin, position_ids + ) + + # Past Key value support + if past_key_value is not None: + # reuse k, v, self_attention + key_states = torch.cat([past_key_value[0], key_states], dim=2) + value_states = torch.cat([past_key_value[1], value_states], dim=2) + + past_key_value = (key_states, value_states) if use_cache else None + + # repeat k/v heads if n_kv_heads < n_heads + key_states = repeat_kv(key_states, self.num_key_value_groups) + value_states = repeat_kv(value_states, self.num_key_value_groups) + + # Flash attention codes from + # https://github.com/HazyResearch/flash-attention/blob/main/flash_attn/flash_attention.py + + # transform the data into the format required by flash attention + qkv = torch.stack( + [query_states, key_states, value_states], dim=2 + ) # [bsz, nh, 3, q_len, hd] + qkv = qkv.transpose(1, 3) # [bsz, q_len, 3, nh, hd] + + # We have disabled _prepare_decoder_attention_mask in LlamaModel + # the attention_mask should be the same as the key_padding_mask + + key_padding_mask = attention_mask.repeat(2, 1) + nheads = qkv.shape[-2] + # shift + + group_size = int(q_len * GROUP_SIZE_RATIO) + if q_len % group_size > 0: + raise ValueError( + f"q_len {q_len} should be divisible by group size {group_size}." + ) + + qkv = ( + qkv.reshape(bsz, q_len, 3, 2, self.num_heads // 2, self.head_dim) + .permute(0, 3, 1, 2, 4, 5) + .reshape(bsz * 2, q_len, 3, self.num_heads // 2, self.head_dim) + ) + x = rearrange( # pylint: disable=invalid-name + qkv, "b s three h d -> b s (three h d)" + ) + x_unpad, indices, cu_q_lens, max_s = unpad_input(x, key_padding_mask) + cu_q_len_tmp = torch.arange( + 0, max_s, group_size, device=key_padding_mask.device, dtype=cu_q_lens.dtype + ) + cu_q_len_tmp = torch.stack([cu_q_len_tmp, cu_q_len_tmp + group_size // 2]).repeat( + bsz, 1 + ) + cu_q_lens[:-1].unsqueeze(-1) + cu_q_lens = torch.cat([cu_q_len_tmp, cu_q_lens[1:].unsqueeze(-1)], dim=-1).view(-1) + + x_unpad = rearrange( + x_unpad, "nnz (three h d) -> nnz three h d", three=3, h=nheads // 2 + ) + output_unpad = flash_attn_varlen_qkvpacked_func( + x_unpad, cu_q_lens, group_size, 0.0, softmax_scale=None, causal=True + ) + output = rearrange( + pad_input( + rearrange(output_unpad, "nnz h d -> nnz (h d)"), indices, bsz * 2, q_len + ), + "b s (h d) -> b s h d", + h=nheads // 2, + ) + output = ( + output.reshape(bsz, 2, q_len, nheads // 2, self.head_dim) + .transpose(1, 2) + .reshape(bsz, q_len, nheads, self.head_dim) + ) + return self.o_proj(rearrange(output, "b s h d -> b s (h d)")), None, past_key_value + + +def flashattn_forward( + self, + hidden_states: torch.Tensor, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.Tensor] = None, + past_key_value: Optional[Tuple[torch.Tensor]] = None, + output_attentions: bool = False, + use_cache: bool = False, + padding_mask: Optional[torch.LongTensor] = None, # pylint: disable=unused-argument + cu_seqlens: Optional[torch.Tensor] = None, + max_seqlen: Optional[torch.Tensor] = None, +) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: + """Input shape: Batch x Time x Channel + + attention_mask: [bsz, q_len] + """ + # pylint: disable=duplicate-code + bsz, q_len, _ = hidden_states.size() + + if not hasattr(self, "pretraining_tp"): + self.pretraining_tp = 1 + + if self.pretraining_tp > 1: + key_value_slicing = ( + self.num_key_value_heads * self.head_dim + ) // self.pretraining_tp + query_slices = self.q_proj.weight.split( + (self.num_heads * self.head_dim) // self.pretraining_tp, dim=0 + ) + key_slices = self.k_proj.weight.split(key_value_slicing, dim=0) + value_slices = self.v_proj.weight.split(key_value_slicing, dim=0) + + query_states = [ + F.linear(hidden_states, query_slices[i]) for i in range(self.pretraining_tp) + ] + query_states = torch.cat(query_states, dim=-1) + + key_states = [ + F.linear(hidden_states, key_slices[i]) for i in range(self.pretraining_tp) + ] + key_states = torch.cat(key_states, dim=-1) + + value_states = [ + F.linear(hidden_states, value_slices[i]) for i in range(self.pretraining_tp) + ] + value_states = torch.cat(value_states, dim=-1) + + else: + if isinstance(self, FusedAttention): + query_states, key_states, value_states = self.qkv_proj(hidden_states).split( + self.out_features, dim=-1 + ) + else: + query_states = self.q_proj(hidden_states) + key_states = self.k_proj(hidden_states) + value_states = self.v_proj(hidden_states) + + query_states = query_states.view( + bsz, q_len, self.num_heads, self.head_dim + ).transpose(1, 2) + key_states = key_states.view( + bsz, q_len, self.num_key_value_heads, self.head_dim + ).transpose(1, 2) + value_states = value_states.view( + bsz, q_len, self.num_key_value_heads, self.head_dim + ).transpose(1, 2) + # [bsz, q_len, nh, hd] + # [bsz, nh, q_len, hd] + + kv_seq_len = key_states.shape[-2] + if past_key_value is not None: + kv_seq_len += past_key_value[0].shape[-2] + + cos, sin = self.rotary_emb( + value_states, seq_len=kv_seq_len, position_ids=position_ids + ) + query_states, key_states = apply_rotary_pos_emb( + query_states, key_states, cos, sin, position_ids + ) + # [bsz, nh, t, hd] + + if past_key_value is not None: + # reuse k, v, self_attention + key_states = torch.cat([past_key_value[0], key_states], dim=2) + value_states = torch.cat([past_key_value[1], value_states], dim=2) + + past_key_value = (key_states, value_states) if use_cache else None + + # repeat k/v heads if n_kv_heads < n_heads + key_states = repeat_kv(key_states, self.num_key_value_groups) + value_states = repeat_kv(value_states, self.num_key_value_groups) + + if output_attentions: + warnings.warn( + "Output attentions is not supported for patched `LlamaAttention`, returning `None` instead." + ) + + # + # flash-attn v2 start + # + + if self.training: + # during training q,k,v always have same seqlen + assert key_states.shape == query_states.shape + is_causal = True + else: + # turn off FA causal mask after first inference autoregressive iteration + # only on first autoregressive step q,k,v have same seqlen + is_causal = key_states.shape == query_states.shape + + dropout_rate = 0.0 if not self.training else getattr(self, "attention_dropout", 0.0) + + if cu_seqlens is not None and max_seqlen is not None and cu_seqlens.dim() == 1: + # special handling using sample packing + qkv = torch.stack( + [query_states, key_states, value_states], dim=2 + ) # [bsz, nh, 3, q_len, hd] + qkv = qkv.transpose(1, 3) # [bsz, q_len, 3, nh, hd] + qkv = rearrange(qkv, "b s ... -> (b s) ...") + + output = flash_attn_varlen_qkvpacked_func( + qkv, + cu_seqlens, + max_seqlen, + dropout_p=dropout_rate, + softmax_scale=None, + causal=True, + ) + output = rearrange(output, "(b s) ... -> b s ...", b=bsz) + elif query_states.shape == key_states.shape: + query_states = query_states.transpose(1, 2) + key_states = key_states.transpose(1, 2) + value_states = value_states.transpose(1, 2) + qkv_unpad, cu_seqlens_q, max_seqlen_q, _, output_pad_fn = generate_qkv( + query_states, + key_states, + value_states, + qkvpacked=True, + # We have disabled _prepare_decoder_attention_mask in LlamaModel + # the attention_mask should be the same as the key_padding_mask + key_padding_mask=attention_mask, + query_padding_mask=attention_mask[:, -query_states.size(1) :] + if attention_mask is not None + else None, + ) + output_unpad = flash_attn_varlen_qkvpacked_func( + qkv_unpad, + cu_seqlens_q, + max_seqlen_q, + dropout_p=dropout_rate, + softmax_scale=None, + causal=is_causal, + ) + output = output_pad_fn(output_unpad) + else: + query_states = query_states.transpose(1, 2) + key_states = key_states.transpose(1, 2) + value_states = value_states.transpose(1, 2) + if attention_mask is None or attention_mask.all().item(): + output = flash_attn_kvpacked_func( + query_states, + torch.stack([key_states, value_states], 2), + dropout_p=dropout_rate, + causal=is_causal, + ) + else: + ( # pylint: disable=unbalanced-tuple-unpacking + q_unpad, + kv_unpad, + cu_seqlens_q, + cu_seqlens_k, + max_seqlen_q, + max_seqlen_k, + _, + _, + output_pad_fn, + ) = generate_qkv( + query_states, + key_states, + value_states, + kvpacked=True, + key_padding_mask=attention_mask, + query_padding_mask=attention_mask[:, -query_states.size(1) :] + if attention_mask is not None + else None, + ) + if q_unpad.dtype != kv_unpad.dtype: + kv_unpad = kv_unpad.to(q_unpad.dtype) + output_unpad = flash_attn_varlen_kvpacked_func( + q_unpad, + kv_unpad, + cu_seqlens_q, + cu_seqlens_k, + max_seqlen_q, + max_seqlen_k, + dropout_p=dropout_rate, + softmax_scale=None, + causal=is_causal, + ) + output = output_pad_fn(output_unpad) + + attn_output = output + if attn_output.size() != (bsz, q_len, self.num_heads, self.head_dim): + raise ValueError( + f"`attn_output` should be of size {(bsz, q_len, self.num_heads, self.head_dim)}, but is" + f" {attn_output.size()}" + ) + attn_output = rearrange(attn_output, "b s h d -> b s (h d)") + + # + # flash-attn v2 end + # + + if self.pretraining_tp > 1: + attn_output = attn_output.split(self.hidden_size // self.pretraining_tp, dim=2) + o_proj_slices = self.o_proj.weight.split( + self.hidden_size // self.pretraining_tp, dim=1 + ) + attn_output = sum( + F.linear(attn_output[i], o_proj_slices[i]) + for i in range(self.pretraining_tp) + ) + else: + attn_output = self.o_proj(attn_output) + + return attn_output, None, past_key_value + + +# based on https://github.com/Dao-AILab/flash-attention/blob/364a5b/tests/test_flash_attn.py#L38 +def generate_qkv( + q, + k, + v, + query_padding_mask=None, + key_padding_mask=None, + kvpacked=False, + qkvpacked=False, +): # pylint: disable=invalid-name,unnecessary-lambda-assignment + """ + Arguments: + q: (batch_size, seqlen_q, nheads, d) + k: (batch_size, seqlen_k, nheads_k, d) + v: (batch_size, seqlen_k, nheads_k, d) + query_padding_mask: (batch_size, seqlen), bool + key_padding_mask: (batch_size, seqlen), bool + """ + assert not (kvpacked and qkvpacked) + batch_size, seqlen_q, nheads, d = q.shape + _, seqlen_k, nheads_k, _ = k.shape + assert k.shape == (batch_size, seqlen_k, nheads_k, d) + assert v.shape == (batch_size, seqlen_k, nheads_k, d) + + if query_padding_mask is not None: + q_unpad, indices_q, cu_seqlens_q, max_seqlen_q = unpad_input( + q, query_padding_mask + ) + + output_pad_fn = lambda output_unpad: pad_input( # noqa: E731 + output_unpad, indices_q, batch_size, seqlen_q + ) + + else: + q_unpad = rearrange(q, "b s h d -> (b s) h d") + cu_seqlens_q = torch.arange( + 0, + (batch_size + 1) * seqlen_q, + step=seqlen_q, + dtype=torch.int32, + device=q_unpad.device, + ) + max_seqlen_q = seqlen_q + + output_pad_fn = lambda output_unpad: rearrange( # noqa: E731 + output_unpad, "(b s) h d -> b s h d", b=batch_size + ) + + if key_padding_mask is not None: + k_unpad, _, cu_seqlens_k, max_seqlen_k = unpad_input(k, key_padding_mask) + v_unpad, _, _, _ = unpad_input(v, key_padding_mask) + else: + k_unpad = rearrange(k, "b s h d -> (b s) h d") + v_unpad = rearrange(v, "b s h d -> (b s) h d") + cu_seqlens_k = torch.arange( + 0, + (batch_size + 1) * seqlen_k, + step=seqlen_k, + dtype=torch.int32, + device=k_unpad.device, + ) + max_seqlen_k = seqlen_k + + if qkvpacked: + assert nheads == nheads_k + qkv_unpad = torch.stack([q_unpad, k_unpad, v_unpad], dim=1) + qkv = torch.stack([q, k, v], dim=2) + return (qkv_unpad, cu_seqlens_q, max_seqlen_q, qkv, output_pad_fn) + + if kvpacked: + kv_unpad = torch.stack([k_unpad, v_unpad], dim=1) + kv = torch.stack([k, v], dim=2) + return ( + q_unpad, + kv_unpad, + cu_seqlens_q, + cu_seqlens_k, + max_seqlen_q, + max_seqlen_k, + q, + kv, + output_pad_fn, + ) + + return ( + q_unpad, + k_unpad, + v_unpad, + cu_seqlens_q, + cu_seqlens_k, + max_seqlen_q, + max_seqlen_k, + q, + k, + v, + output_pad_fn, + ) + + +def llama_model_forward( + self, + input_ids: torch.LongTensor = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_values: Optional[List[torch.FloatTensor]] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + cache_position: Optional[ # pylint: disable=unused-argument + torch.LongTensor + ] = None, +) -> Union[Tuple, BaseModelOutputWithPast]: + output_attentions = ( + output_attentions + if output_attentions is not None + else self.config.output_attentions + ) + output_hidden_states = ( + output_hidden_states + if output_hidden_states is not None + else self.config.output_hidden_states + ) + use_cache = use_cache if use_cache is not None else self.config.use_cache + + return_dict = ( + return_dict if return_dict is not None else self.config.use_return_dict + ) + + # retrieve input_ids and inputs_embeds + if input_ids is not None and inputs_embeds is not None: + raise ValueError( + "You cannot specify both decoder_input_ids and decoder_inputs_embeds at the same time" + ) + if input_ids is not None: + batch_size, seq_length = input_ids.shape + elif inputs_embeds is not None: + batch_size, seq_length, _ = inputs_embeds.shape + else: + raise ValueError( + "You have to specify either decoder_input_ids or decoder_inputs_embeds" + ) + + seq_length_with_past = seq_length + past_key_values_length = 0 + + if past_key_values is not None: + past_key_values_length = past_key_values[0][0].shape[2] + seq_length_with_past = seq_length_with_past + past_key_values_length + + cu_seqlens = None + max_seqlen = None + if position_ids is None: + device = input_ids.device if input_ids is not None else inputs_embeds.device + position_ids = torch.arange( + past_key_values_length, + seq_length + past_key_values_length, + dtype=torch.long, + device=device, + ) + position_ids = position_ids.unsqueeze(0).view(-1, seq_length) + else: + position_ids = position_ids.view(-1, seq_length).long() + cu_seqlens, max_seqlen = get_cu_seqlens_from_pos_ids(position_ids) + cu_seqlens = cu_seqlens.squeeze() + + if inputs_embeds is None: + inputs_embeds = self.embed_tokens(input_ids) + # embed positions + if attention_mask is None: + attention_mask = torch.ones( + (batch_size, seq_length_with_past), + dtype=torch.bool, + device=inputs_embeds.device, + ) + padding_mask = None + else: + if 0 in attention_mask: + padding_mask = attention_mask + else: + padding_mask = None + + attention_mask = ( + self._prepare_decoder_attention_mask( # pylint: disable=protected-access + attention_mask, + (batch_size, seq_length), + inputs_embeds, + past_key_values_length, + ) + ) + + hidden_states = inputs_embeds + + if self.gradient_checkpointing and self.training: + if use_cache: + transformers.logger.warning_once( + "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..." + ) + use_cache = False + + # decoder layers + all_hidden_states = () if output_hidden_states else None + all_self_attns = () if output_attentions else None + next_decoder_cache = () if use_cache else None + + for idx, decoder_layer in enumerate(self.layers): + if output_hidden_states: + all_hidden_states += (hidden_states,) + + past_key_value = past_key_values[idx] if past_key_values is not None else None + + if self.gradient_checkpointing and self.training: + + def create_custom_forward(module): + def custom_forward(*inputs): + # None for past_key_value + return module( + *inputs, + ) + + return custom_forward + + layer_outputs = torch.utils.checkpoint.checkpoint( + create_custom_forward(decoder_layer), + hidden_states, + attention_mask, + position_ids, + past_key_value, + output_attentions, + None, + padding_mask, + cu_seqlens, + max_seqlen, + ) + else: + layer_outputs = decoder_layer( + hidden_states, + attention_mask=attention_mask, + position_ids=position_ids, + past_key_value=past_key_value, + output_attentions=output_attentions, + use_cache=use_cache, + padding_mask=padding_mask, + cu_seqlens=cu_seqlens, + max_seqlen=max_seqlen, + ) + + hidden_states = layer_outputs[0] + + if use_cache: + next_decoder_cache += (layer_outputs[2 if output_attentions else 1],) + + if output_attentions: + all_self_attns += (layer_outputs[1],) + + hidden_states = self.norm(hidden_states) + + # add hidden states from the last decoder layer + if output_hidden_states: + all_hidden_states += (hidden_states,) + + next_cache = next_decoder_cache if use_cache else None + if not return_dict: + return tuple( + v + for v in [hidden_states, next_cache, all_hidden_states, all_self_attns] + if v is not None + ) + return BaseModelOutputWithPast( + last_hidden_state=hidden_states, + past_key_values=next_cache, + hidden_states=all_hidden_states, + attentions=all_self_attns, + ) + + +class LlamaDecoderLayer(OriginalLlamaDecoderLayer): + """ + patched version of LlamaDecoderLayer to pass through the precalculated cu_seqlens + """ + + def forward( + self, + hidden_states: torch.Tensor, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_value: Optional[Tuple[torch.Tensor]] = None, + output_attentions: Optional[bool] = False, + use_cache: Optional[bool] = False, + padding_mask: Optional[torch.LongTensor] = None, + cu_seqlens: Optional[torch.Tensor] = None, + max_seqlen: Optional[torch.Tensor] = None, + ) -> Tuple[ + torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]] + ]: + """ + Args: + hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)` + attention_mask (`torch.FloatTensor`, *optional*): attention mask of size + `(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values. + output_attentions (`bool`, *optional*): + Whether or not to return the attentions tensors of all attention layers. See `attentions` under + returned tensors for more detail. + use_cache (`bool`, *optional*): + If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding + (see `past_key_values`). + past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states + cu_seqlens (`torch.Tensor`, *optional*) cumulative sequence len when packing + """ + + residual = hidden_states + + hidden_states = self.input_layernorm(hidden_states) + + # Self Attention + hidden_states, self_attn_weights, present_key_value = self.self_attn( + hidden_states=hidden_states, + attention_mask=attention_mask, + position_ids=position_ids, + past_key_value=past_key_value, + output_attentions=output_attentions, + use_cache=use_cache, + padding_mask=padding_mask, + cu_seqlens=cu_seqlens, + max_seqlen=max_seqlen, + ) + hidden_states = residual + hidden_states + + # Fully Connected + residual = hidden_states + hidden_states = self.post_attention_layernorm(hidden_states) + hidden_states = self.mlp(hidden_states) + hidden_states = residual + hidden_states + + outputs = (hidden_states,) + + if output_attentions: + outputs += (self_attn_weights,) + + if use_cache: + outputs += (present_key_value,) + + return outputs diff --git a/src/axolotl/monkeypatch/llama_attn_hijack_xformers.py b/src/axolotl/monkeypatch/llama_attn_hijack_xformers.py new file mode 100644 index 0000000000000000000000000000000000000000..8143750f0050184609ea61711c35bdf33dcbe59a --- /dev/null +++ b/src/axolotl/monkeypatch/llama_attn_hijack_xformers.py @@ -0,0 +1,157 @@ +""" +Directly copied the code from https://raw.githubusercontent.com/oobabooga/text-generation-webui/main/modules/llama_attn_hijack.py and made some adjustments +""" + +import logging +import warnings +from typing import Optional, Tuple + +import torch +import torch.nn.functional as F +import transformers.models.llama.modeling_llama +from transformers.models.llama.modeling_llama import apply_rotary_pos_emb, repeat_kv + +try: + import xformers.ops +except ImportError: + logging.error("xformers not found! Please install it before trying to use it.") + + +def hijack_llama_attention(): + transformers.models.llama.modeling_llama.LlamaAttention.forward = xformers_forward + + +def xformers_forward( + self, + hidden_states: torch.Tensor, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_value: Optional[Tuple[torch.Tensor]] = None, + output_attentions: bool = False, + use_cache: bool = False, + padding_mask: Optional[torch.LongTensor] = None, # pylint: disable=unused-argument + **kwargs, # pylint: disable=unused-argument +) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: + # pylint: disable=duplicate-code + bsz, q_len, _ = hidden_states.size() + + if not hasattr(self, "pretraining_tp"): + self.pretraining_tp = 1 + + if self.pretraining_tp > 1: + key_value_slicing = ( + self.num_key_value_heads * self.head_dim + ) // self.pretraining_tp + query_slices = self.q_proj.weight.split( + (self.num_heads * self.head_dim) // self.pretraining_tp, dim=0 + ) + key_slices = self.k_proj.weight.split(key_value_slicing, dim=0) + value_slices = self.v_proj.weight.split(key_value_slicing, dim=0) + + query_states = [ + F.linear(hidden_states, query_slices[i]) for i in range(self.pretraining_tp) + ] + query_states = torch.cat(query_states, dim=-1) + + key_states = [ + F.linear(hidden_states, key_slices[i]) for i in range(self.pretraining_tp) + ] + key_states = torch.cat(key_states, dim=-1) + + value_states = [ + F.linear(hidden_states, value_slices[i]) for i in range(self.pretraining_tp) + ] + value_states = torch.cat(value_states, dim=-1) + + else: + query_states = self.q_proj(hidden_states) + key_states = self.k_proj(hidden_states) + value_states = self.v_proj(hidden_states) + + query_states = query_states.view( + bsz, q_len, self.num_heads, self.head_dim + ).transpose(1, 2) + key_states = key_states.view( + bsz, q_len, self.num_key_value_heads, self.head_dim + ).transpose(1, 2) + value_states = value_states.view( + bsz, q_len, self.num_key_value_heads, self.head_dim + ).transpose(1, 2) + # [bsz, q_len, nh, hd] + # [bsz, nh, q_len, hd] + + kv_seq_len = key_states.shape[-2] + if past_key_value is not None: + kv_seq_len += past_key_value[0].shape[-2] + + cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len) + query_states, key_states = apply_rotary_pos_emb( + query_states, key_states, cos, sin, position_ids + ) + # [bsz, nh, t, hd] + + if past_key_value is not None: + # reuse k, v, self_attention + key_states = torch.cat([past_key_value[0], key_states], dim=2) + value_states = torch.cat([past_key_value[1], value_states], dim=2) + + past_key_value = (key_states, value_states) if use_cache else None + + # repeat k/v heads if n_kv_heads < n_heads + key_states = repeat_kv(key_states, self.num_key_value_groups) + value_states = repeat_kv(value_states, self.num_key_value_groups) + + if output_attentions: + warnings.warn( + "Output attentions is not supported for patched `LlamaAttention`, returning `None` instead." + ) + + # + # xformers-attn start + # + + query_states = query_states.transpose(1, 2) + key_states = key_states.transpose(1, 2) + value_states = value_states.transpose(1, 2) + + # This is a nasty hack. We know attention_mask in transformers is either LowerTriangular or all Zeros. + # We therefore check if one element in the upper triangular portion is zero. If it is, then the mask is all zeros. + if attention_mask is None or attention_mask[0, 0, 0, 1] == 0: + # input and output should be of form (bsz, q_len, num_heads, head_dim) + attn_output = xformers.ops.memory_efficient_attention( + query_states, key_states, value_states, attn_bias=None + ) + else: + # input and output should be of form (bsz, q_len, num_heads, head_dim) + attn_output = xformers.ops.memory_efficient_attention( + query_states, + key_states, + value_states, + # attn_bias=attention_mask, + attn_bias=xformers.ops.LowerTriangularMask(), + ) + + if attn_output.size() != (bsz, q_len, self.num_heads, self.head_dim): + raise ValueError( + f"`attn_output` should be of size {(bsz, q_len, self.num_heads, self.head_dim)}, but is" + f" {attn_output.size()}" + ) + attn_output = attn_output.reshape(bsz, q_len, self.hidden_size) + + # + # xformers-attn end + # + + if self.pretraining_tp > 1: + attn_output = attn_output.split(self.hidden_size // self.pretraining_tp, dim=2) + o_proj_slices = self.o_proj.weight.split( + self.hidden_size // self.pretraining_tp, dim=1 + ) + attn_output = sum( + F.linear(attn_output[i], o_proj_slices[i]) + for i in range(self.pretraining_tp) + ) + else: + attn_output = self.o_proj(attn_output) + + return attn_output, None, past_key_value diff --git a/src/axolotl/monkeypatch/llama_expand_mask.py b/src/axolotl/monkeypatch/llama_expand_mask.py new file mode 100644 index 0000000000000000000000000000000000000000..5738bb543cf53d64e81ad61fee3e08390fc08c39 --- /dev/null +++ b/src/axolotl/monkeypatch/llama_expand_mask.py @@ -0,0 +1,25 @@ +""" +expands the binary attention mask per 3.2.2 of https://arxiv.org/pdf/2107.02027.pdf +""" +from typing import Optional + +import torch + +from axolotl.monkeypatch.utils import mask_2d_to_4d + + +def _expand_mask(mask: torch.Tensor, dtype: torch.dtype, tgt_len: Optional[int] = None): + masked_zero_one_mask = mask_2d_to_4d(mask, dtype, tgt_len) + inverted_mask = 1.0 - masked_zero_one_mask + + return inverted_mask.masked_fill( + inverted_mask.to(torch.bool), torch.finfo(dtype).min + ) + + +def hijack_expand_mask(): + import transformers + + transformers.models.llama.modeling_llama._expand_mask = ( # pylint: disable=protected-access + _expand_mask + ) diff --git a/src/axolotl/monkeypatch/llama_patch_multipack.py b/src/axolotl/monkeypatch/llama_patch_multipack.py new file mode 100644 index 0000000000000000000000000000000000000000..540c5577a0396788c6a824aa0ebf5af36aa79dd7 --- /dev/null +++ b/src/axolotl/monkeypatch/llama_patch_multipack.py @@ -0,0 +1,26 @@ +""" +Patched LlamaAttention to use torch.nn.functional.scaled_dot_product_attention +""" + +from axolotl.monkeypatch.utils import ( + patched_prepare_4d_causal_attention_mask, + patched_prepare_4d_causal_attention_mask_for_sdpa, +) + + +def hijack_llama_prepare_4d_mask(): + import transformers.modeling_attn_mask_utils + import transformers.models.llama.modeling_llama + + transformers.models.llama.modeling_llama._prepare_4d_causal_attention_mask_for_sdpa = ( # pylint: disable=protected-access + patched_prepare_4d_causal_attention_mask_for_sdpa + ) + transformers.modeling_attn_mask_utils._prepare_4d_causal_attention_mask_for_sdpa = ( # pylint: disable=protected-access + patched_prepare_4d_causal_attention_mask_for_sdpa + ) + transformers.models.llama.modeling_llama._prepare_4d_causal_attention_mask = ( # pylint: disable=protected-access + patched_prepare_4d_causal_attention_mask + ) + transformers.modeling_attn_mask_utils._prepare_4d_causal_attention_mask = ( # pylint: disable=protected-access + patched_prepare_4d_causal_attention_mask + ) diff --git a/src/axolotl/monkeypatch/mistral_attn_hijack_flash.py b/src/axolotl/monkeypatch/mistral_attn_hijack_flash.py new file mode 100644 index 0000000000000000000000000000000000000000..8e43da1110e6cc2d51211d33e5a260c2ab4e4a9e --- /dev/null +++ b/src/axolotl/monkeypatch/mistral_attn_hijack_flash.py @@ -0,0 +1,643 @@ +"""Flash attention monkey patch for mistral model""" +# pylint: disable=duplicate-code + +import logging +from typing import List, Optional, Tuple, Union + +import torch +import transformers +from einops import rearrange +from flash_attn.bert_padding import pad_input, unpad_input +from flash_attn.flash_attn_interface import ( # pylint: disable=ungrouped-imports + flash_attn_kvpacked_func, + flash_attn_varlen_kvpacked_func, + flash_attn_varlen_qkvpacked_func, +) +from transformers.modeling_outputs import BaseModelOutputWithPast +from transformers.models.mistral.modeling_mistral import ( + MistralAttention as OriginalMistralAttention, +) +from transformers.models.mistral.modeling_mistral import ( + MistralDecoderLayer as OriginalMistralDecoderLayer, +) +from transformers.models.mistral.modeling_mistral import apply_rotary_pos_emb, repeat_kv + +from axolotl.monkeypatch.utils import get_cu_seqlens_from_pos_ids + +LOG = logging.getLogger("axolotl.monkeypatch.mistral") + + +def replace_mistral_attn_with_flash_attn( + packed: Optional[bool] = False, +): + transformers.models.mistral.modeling_mistral.MistralModel._prepare_decoder_attention_mask = ( # pylint: disable=protected-access + _prepare_decoder_attention_mask + ) + transformers.models.mistral.modeling_mistral.MistralAttention.forward = ( + flashattn_forward + ) + if packed: + transformers.models.mistral.modeling_mistral.MistralDecoderLayer = ( + MistralDecoderLayer + ) + transformers.models.mistral.modeling_mistral.MistralModel.forward = ( + mistral_model_forward + ) + + +@torch.jit.script +def _make_sliding_window_causal_mask( + bsz: int, + tgt_len: int, + dtype: torch.dtype, + device: torch.device, + past_key_values_length: int = 0, + sliding_window: int = 4096, +): + """ + Make causal mask used for sliding window attention + """ + tensor = torch.full( + (tgt_len, tgt_len), + fill_value=1, + device=device, + ) + mask = torch.tril(tensor, diagonal=0) + # make the mask banded to account for sliding window + # NOTE: HF implementation is wrong as of 14-10-2023 for torch.triu, needs +1 + mask = torch.triu(mask, diagonal=-sliding_window + 1) + mask = torch.log(mask).to(dtype) + + if past_key_values_length > 0: + mask = torch.cat( + [ + torch.zeros( + tgt_len, past_key_values_length, dtype=dtype, device=device + ), + mask, + ], + dim=-1, + ) + return mask[None, None, :, :].expand( + bsz, 1, tgt_len, tgt_len + past_key_values_length + ) + + +# Disable the transformation of the attention mask in LlamaModel as the flash attention +# requires the attention mask to be the same as the key_padding_mask +def _prepare_decoder_attention_mask( + self, + attention_mask, + input_shape, + inputs_embeds, + past_key_values_length, + sliding_window, +): # pylint: disable=unused-argument + # [bsz, seq_len] + if attention_mask is None or sliding_window is None: + return attention_mask + + # NOTE: attention mask and sliding masks are only broadcastable in certain scenarios. + # Without attention_mask.shape[0] == 1, error will trigger after eval loss but only when wandb is enabled. + if input_shape[-1] > 1 and attention_mask.shape[0] == 1: + sliding_window_mask = _make_sliding_window_causal_mask( + bsz=input_shape[0], + tgt_len=input_shape[1], + dtype=inputs_embeds.dtype, + device=inputs_embeds.device, + past_key_values_length=past_key_values_length, + sliding_window=sliding_window, + ) + attention_mask = attention_mask + sliding_window_mask + else: + LOG.info("skipping sliding window mask, not broadcastable with attention mask") + + return attention_mask + + +def flashattn_forward( + self: OriginalMistralAttention, + hidden_states: torch.Tensor, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_value: Optional[Tuple[torch.Tensor]] = None, + output_attentions: bool = False, + use_cache: bool = False, + cu_seqlens: Optional[torch.Tensor] = None, + max_seqlen: Optional[torch.Tensor] = None, +) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: + bsz, q_len, _ = hidden_states.size() + + query_states = self.q_proj(hidden_states) + key_states = self.k_proj(hidden_states) + value_states = self.v_proj(hidden_states) + + query_states = query_states.view( + bsz, q_len, self.num_heads, self.head_dim + ).transpose(1, 2) + key_states = key_states.view( + bsz, q_len, self.num_key_value_heads, self.head_dim + ).transpose(1, 2) + value_states = value_states.view( + bsz, q_len, self.num_key_value_heads, self.head_dim + ).transpose(1, 2) + + kv_seq_len = key_states.shape[-2] + if past_key_value is not None: + kv_seq_len += past_key_value[0].shape[-2] + cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len) + query_states, key_states = apply_rotary_pos_emb( + query_states, key_states, cos, sin, position_ids + ) + + use_sliding_windows = ( + getattr(self.config, "sliding_window") is not None + and kv_seq_len > self.config.sliding_window + ) + + if use_sliding_windows: + window_size = (self.config.sliding_window, self.config.sliding_window) + else: + window_size = (-1, -1) + + if past_key_value is not None: + # Activate slicing cache only if the config has a value `sliding_windows` attribute + if ( + hasattr(self.config, "sliding_window") + and kv_seq_len > self.config.sliding_window + ): + slicing_tokens = kv_seq_len - self.config.sliding_window + + past_key = past_key_value[0] + past_value = past_key_value[1] + + past_key = past_key[:, :, slicing_tokens:, :].contiguous() + past_value = past_value[:, :, slicing_tokens:, :].contiguous() + + if past_key.shape[-2] != self.config.sliding_window - 1: + raise ValueError( + f"past key much have a shape of (`batch_size, num_heads, self.config.sliding_window-1, head_dim`), got" + f" {past_key.shape}" + ) + + past_key_value = (past_key, past_value) if use_cache else None + + if past_key_value is not None: + key_states = torch.cat([past_key_value[0], key_states], dim=2) + value_states = torch.cat([past_key_value[1], value_states], dim=2) + + past_key_value = (key_states, value_states) if use_cache else None + + # repeat k/v heads if n_kv_heads < n_heads + key_states = repeat_kv(key_states, self.num_key_value_groups) + value_states = repeat_kv(value_states, self.num_key_value_groups) + + if self.training: + # during training q,k,v always have same seqlen + assert key_states.shape == query_states.shape + is_causal = True + else: + # turn off FA causal mask after first inference autoregressive iteration + # only on first autoregressive step q,k,v have same seqlen + is_causal = key_states.shape == query_states.shape + + dropout_rate = 0.0 if not self.training else getattr(self, "attention_dropout", 0.0) + + if cu_seqlens is not None and max_seqlen is not None and cu_seqlens.dim() == 1: + # special handling using sample packing + qkv = torch.stack( + [query_states, key_states, value_states], dim=2 + ) # [bsz, nh, 3, q_len, hd] + qkv = qkv.transpose(1, 3) # [bsz, q_len, 3, nh, hd] + qkv = rearrange(qkv, "b s ... -> (b s) ...") + + output = flash_attn_varlen_qkvpacked_func( + qkv, + cu_seqlens, + max_seqlen, + dropout_p=dropout_rate, + softmax_scale=None, + causal=True, + window_size=window_size, + ) + output = rearrange(output, "(b s) ... -> b s ...", b=bsz) + elif query_states.shape == key_states.shape: + query_states = query_states.transpose(1, 2) + key_states = key_states.transpose(1, 2) + value_states = value_states.transpose(1, 2) + qkv_unpad, cu_seqlens_q, max_seqlen_q, _, output_pad_fn = generate_qkv( + query_states, + key_states, + value_states, + qkvpacked=True, + # We have disabled _prepare_decoder_attention_mask in LlamaModel + # the attention_mask should be the same as the key_padding_mask + key_padding_mask=attention_mask, + query_padding_mask=attention_mask[:, -query_states.size(1) :] + if attention_mask is not None + else None, + ) + output_unpad = flash_attn_varlen_qkvpacked_func( + qkv_unpad, + cu_seqlens_q, + max_seqlen_q, + dropout_p=dropout_rate, + softmax_scale=None, + causal=is_causal, + window_size=window_size, + ) + output = output_pad_fn(output_unpad) + else: + query_states = query_states.transpose(1, 2) + key_states = key_states.transpose(1, 2) + value_states = value_states.transpose(1, 2) + if attention_mask is None or attention_mask.all().item(): + output = flash_attn_kvpacked_func( + query_states, + torch.stack([key_states, value_states], 2), + dropout_p=dropout_rate, + causal=is_causal, + window_size=window_size, + ) + else: + ( # pylint: disable=unbalanced-tuple-unpacking + q_unpad, + kv_unpad, + cu_seqlens_q, + cu_seqlens_k, + max_seqlen_q, + max_seqlen_k, + _, + _, + output_pad_fn, + ) = generate_qkv( + query_states, + key_states, + value_states, + kvpacked=True, + key_padding_mask=attention_mask, + query_padding_mask=attention_mask[:, -query_states.size(1) :] + if attention_mask is not None + else None, + ) + if q_unpad.dtype != kv_unpad.dtype: + kv_unpad = kv_unpad.to(q_unpad.dtype) + output_unpad = flash_attn_varlen_kvpacked_func( + q_unpad, + kv_unpad, + cu_seqlens_q, + cu_seqlens_k, + max_seqlen_q, + max_seqlen_k, + dropout_p=dropout_rate, + softmax_scale=None, + causal=is_causal, + window_size=window_size, + ) + output = output_pad_fn(output_unpad) + + attn_output = output + if attn_output.size() != (bsz, q_len, self.num_heads, self.head_dim): + raise ValueError( + f"`attn_output` should be of size {(bsz, q_len, self.num_heads, self.head_dim)}, but is" + f" {attn_output.size()}" + ) + attn_output = rearrange(attn_output, "b s h d -> b s (h d)") + + attn_output = self.o_proj(attn_output) + + if not output_attentions: + attn_weights = None + + return attn_output, attn_weights, past_key_value + + +# based on https://github.com/Dao-AILab/flash-attention/blob/364a5b/tests/test_flash_attn.py#L38 +def generate_qkv( + q, + k, + v, + query_padding_mask=None, + key_padding_mask=None, + kvpacked=False, + qkvpacked=False, +): # pylint: disable=invalid-name,unnecessary-lambda-assignment + """ + Arguments: + q: (batch_size, seqlen_q, nheads, d) + k: (batch_size, seqlen_k, nheads_k, d) + v: (batch_size, seqlen_k, nheads_k, d) + query_padding_mask: (batch_size, seqlen), bool + key_padding_mask: (batch_size, seqlen), bool + """ + assert not (kvpacked and qkvpacked) + batch_size, seqlen_q, nheads, d = q.shape + _, seqlen_k, nheads_k, _ = k.shape + assert k.shape == (batch_size, seqlen_k, nheads_k, d) + assert v.shape == (batch_size, seqlen_k, nheads_k, d) + + if query_padding_mask is not None: + q_unpad, indices_q, cu_seqlens_q, max_seqlen_q = unpad_input( + q, query_padding_mask + ) + + output_pad_fn = lambda output_unpad: pad_input( # noqa: E731 + output_unpad, indices_q, batch_size, seqlen_q + ) + + else: + q_unpad = rearrange(q, "b s h d -> (b s) h d") + cu_seqlens_q = torch.arange( + 0, + (batch_size + 1) * seqlen_q, + step=seqlen_q, + dtype=torch.int32, + device=q_unpad.device, + ) + max_seqlen_q = seqlen_q + + output_pad_fn = lambda output_unpad: rearrange( # noqa: E731 + output_unpad, "(b s) h d -> b s h d", b=batch_size + ) + + if key_padding_mask is not None: + k_unpad, _, cu_seqlens_k, max_seqlen_k = unpad_input(k, key_padding_mask) + v_unpad, _, _, _ = unpad_input(v, key_padding_mask) + else: + k_unpad = rearrange(k, "b s h d -> (b s) h d") + v_unpad = rearrange(v, "b s h d -> (b s) h d") + cu_seqlens_k = torch.arange( + 0, + (batch_size + 1) * seqlen_k, + step=seqlen_k, + dtype=torch.int32, + device=k_unpad.device, + ) + max_seqlen_k = seqlen_k + + if qkvpacked: + assert nheads == nheads_k + qkv_unpad = torch.stack([q_unpad, k_unpad, v_unpad], dim=1) + qkv = torch.stack([q, k, v], dim=2) + return (qkv_unpad, cu_seqlens_q, max_seqlen_q, qkv, output_pad_fn) + + if kvpacked: + kv_unpad = torch.stack([k_unpad, v_unpad], dim=1) + kv = torch.stack([k, v], dim=2) + return ( + q_unpad, + kv_unpad, + cu_seqlens_q, + cu_seqlens_k, + max_seqlen_q, + max_seqlen_k, + q, + kv, + output_pad_fn, + ) + + return ( + q_unpad, + k_unpad, + v_unpad, + cu_seqlens_q, + cu_seqlens_k, + max_seqlen_q, + max_seqlen_k, + q, + k, + v, + output_pad_fn, + ) + + +def mistral_model_forward( + self, + input_ids: torch.LongTensor = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_values: Optional[List[torch.FloatTensor]] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, +) -> Union[Tuple, BaseModelOutputWithPast]: + output_attentions = ( + output_attentions + if output_attentions is not None + else self.config.output_attentions + ) + output_hidden_states = ( + output_hidden_states + if output_hidden_states is not None + else self.config.output_hidden_states + ) + use_cache = use_cache if use_cache is not None else self.config.use_cache + + return_dict = ( + return_dict if return_dict is not None else self.config.use_return_dict + ) + + # retrieve input_ids and inputs_embeds + if input_ids is not None and inputs_embeds is not None: + raise ValueError( + "You cannot specify both decoder_input_ids and decoder_inputs_embeds at the same time" + ) + if input_ids is not None: + batch_size, seq_length = input_ids.shape + elif inputs_embeds is not None: + batch_size, seq_length, _ = inputs_embeds.shape + else: + raise ValueError( + "You have to specify either decoder_input_ids or decoder_inputs_embeds" + ) + + seq_length_with_past = seq_length + past_key_values_length = 0 + + if past_key_values is not None: + past_key_values_length = past_key_values[0][0].shape[2] + seq_length_with_past = seq_length_with_past + past_key_values_length + + cu_seqlens = None + max_seqlen = None + if position_ids is None: + device = input_ids.device if input_ids is not None else inputs_embeds.device + position_ids = torch.arange( + past_key_values_length, + seq_length + past_key_values_length, + dtype=torch.long, + device=device, + ) + position_ids = position_ids.unsqueeze(0).view(-1, seq_length) + else: + position_ids = position_ids.view(-1, seq_length).long() + cu_seqlens, max_seqlen = get_cu_seqlens_from_pos_ids(position_ids) + cu_seqlens = cu_seqlens.squeeze() + + if inputs_embeds is None: + inputs_embeds = self.embed_tokens(input_ids) + # embed positions + if attention_mask is None: + attention_mask = torch.ones( + (batch_size, seq_length_with_past), + dtype=torch.bool, + device=inputs_embeds.device, + ) + attention_mask = ( + self._prepare_decoder_attention_mask( # pylint: disable=protected-access + attention_mask, + (batch_size, seq_length), + inputs_embeds, + past_key_values_length, + sliding_window=self.config.sliding_window, + ) + ) + + hidden_states = inputs_embeds + + if self.gradient_checkpointing and self.training: + if use_cache: + transformers.logger.warning_once( + "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..." + ) + use_cache = False + + # decoder layers + all_hidden_states = () if output_hidden_states else None + all_self_attns = () if output_attentions else None + next_decoder_cache = () if use_cache else None + + for idx, decoder_layer in enumerate(self.layers): + if output_hidden_states: + all_hidden_states += (hidden_states,) + + past_key_value = past_key_values[idx] if past_key_values is not None else None + + if self.gradient_checkpointing and self.training: + + def create_custom_forward(module): + def custom_forward(*inputs): + # None for past_key_value + return module(*inputs) + + return custom_forward + + layer_outputs = torch.utils.checkpoint.checkpoint( + create_custom_forward(decoder_layer), + hidden_states, + attention_mask, + position_ids, + past_key_value, + output_attentions, + None, + cu_seqlens, + max_seqlen, + ) + else: + layer_outputs = decoder_layer( + hidden_states, + attention_mask=attention_mask, + position_ids=position_ids, + past_key_value=past_key_value, + output_attentions=output_attentions, + use_cache=use_cache, + cu_seqlens=cu_seqlens, + max_seqlen=max_seqlen, + ) + + hidden_states = layer_outputs[0] + + if use_cache: + next_decoder_cache += (layer_outputs[2 if output_attentions else 1],) + + if output_attentions: + all_self_attns += (layer_outputs[1],) + + hidden_states = self.norm(hidden_states) + + # add hidden states from the last decoder layer + if output_hidden_states: + all_hidden_states += (hidden_states,) + + next_cache = next_decoder_cache if use_cache else None + if not return_dict: + return tuple( + v + for v in [hidden_states, next_cache, all_hidden_states, all_self_attns] + if v is not None + ) + return BaseModelOutputWithPast( + last_hidden_state=hidden_states, + past_key_values=next_cache, + hidden_states=all_hidden_states, + attentions=all_self_attns, + ) + + +class MistralDecoderLayer(OriginalMistralDecoderLayer): + """ + patched version of MistralDecoderLayer to pass through the precalculated cu_seqlens + """ + + def forward( + self, + hidden_states: torch.Tensor, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_value: Optional[Tuple[torch.Tensor]] = None, + output_attentions: Optional[bool] = False, + use_cache: Optional[bool] = False, + cu_seqlens: Optional[torch.Tensor] = None, + max_seqlen: Optional[torch.Tensor] = None, + ) -> Tuple[ + torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]] + ]: + """ + Args: + hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)` + attention_mask (`torch.FloatTensor`, *optional*): attention mask of size + `(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values. + output_attentions (`bool`, *optional*): + Whether or not to return the attentions tensors of all attention layers. See `attentions` under + returned tensors for more detail. + use_cache (`bool`, *optional*): + If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding + (see `past_key_values`). + past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states + cu_seqlens (`torch.Tensor`, *optional*) cumulative sequence len when packing + """ + + residual = hidden_states + + hidden_states = self.input_layernorm(hidden_states) + + # Self Attention + hidden_states, self_attn_weights, present_key_value = self.self_attn( + hidden_states=hidden_states, + attention_mask=attention_mask, + position_ids=position_ids, + past_key_value=past_key_value, + output_attentions=output_attentions, + use_cache=use_cache, + cu_seqlens=cu_seqlens, + max_seqlen=max_seqlen, + ) + hidden_states = residual + hidden_states + + # Fully Connected + residual = hidden_states + hidden_states = self.post_attention_layernorm(hidden_states) + hidden_states = self.mlp(hidden_states) + hidden_states = residual + hidden_states + + outputs = (hidden_states,) + + if output_attentions: + outputs += (self_attn_weights,) + + if use_cache: + outputs += (present_key_value,) + + return outputs diff --git a/src/axolotl/monkeypatch/mixtral/__init__.py b/src/axolotl/monkeypatch/mixtral/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d6ee0ce16b72191ff3cc83066252bfa73af3801a --- /dev/null +++ b/src/axolotl/monkeypatch/mixtral/__init__.py @@ -0,0 +1,50 @@ +""" +Patches to support multipack for mixtral +""" +import torch + + +def patch_mixtral_moe_forward_zero3() -> None: + import torch.nn.functional as F + + def mlp_forward(self, hidden_states): + current_hidden_states = self.act_fn(self.w1(hidden_states)) * self.w3( + hidden_states + ) + current_hidden_states = self.w2(current_hidden_states) + return current_hidden_states + + # Ref. https://huggingface.co/deepseek-ai/deepseek-moe-16b-base/blob/main/modeling_deepseek.py + def moe_forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + batch_size, sequence_length, hidden_dim = hidden_states.shape + hidden_states = hidden_states.view(-1, hidden_dim) + # router_logits: (batch * sequence_length, n_experts) + router_logits = self.gate(hidden_states) + + routing_weights = F.softmax(router_logits, dim=1, dtype=torch.float) + topk_weight, topk_idx = torch.topk( + routing_weights, self.top_k, dim=-1, sorted=False + ) + topk_weight /= topk_weight.sum(dim=-1, keepdim=True) + # we cast back to the input dtype + topk_weight = topk_weight.to(hidden_states.dtype) + + hidden_states = hidden_states.repeat_interleave(self.top_k, dim=0) + y = torch.empty_like(hidden_states) # pylint: disable=invalid-name + flat_topk_idx = topk_idx.view(-1) + for i in range(self.num_experts): + expert = self.experts[i] + y[flat_topk_idx == i] = expert(hidden_states[flat_topk_idx == i]) + y = ( # pylint: disable=invalid-name + y.view(*topk_weight.shape, -1) * topk_weight.unsqueeze(-1) + ).sum(dim=1) + final_hidden_states = y.reshape(batch_size, sequence_length, hidden_dim) + return final_hidden_states, router_logits + + from transformers.models.mixtral.modeling_mixtral import ( + MixtralBLockSparseTop2MLP, + MixtralSparseMoeBlock, + ) + + MixtralBLockSparseTop2MLP.forward = mlp_forward + MixtralSparseMoeBlock.forward = moe_forward diff --git a/src/axolotl/monkeypatch/multipack.py b/src/axolotl/monkeypatch/multipack.py new file mode 100644 index 0000000000000000000000000000000000000000..fbcaf7a668502e34ca06d22b4e6cac6d747a5a63 --- /dev/null +++ b/src/axolotl/monkeypatch/multipack.py @@ -0,0 +1,61 @@ +"""multipack patching for v2 of sample packing""" +import importlib + +import transformers +from accelerate import init_empty_weights +from transformers import AutoConfig, AutoModelForCausalLM +from transformers.integrations import is_deepspeed_zero3_enabled + +from axolotl.monkeypatch.mixtral import patch_mixtral_moe_forward_zero3 +from axolotl.monkeypatch.utils import get_unpad_data + +SUPPORTED_MULTIPACK_MODEL_TYPES = [ + "mixtral", + "qwen2", + "falcon", + "phi", + "gemma", + "gemmoe", + "starcoder2", +] + + +def patch_for_multipack(model_type, model_name=None): + if model_type == "mixtral": + transformers.models.mixtral.modeling_mixtral._get_unpad_data = ( # pylint: disable=protected-access + get_unpad_data + ) + if is_deepspeed_zero3_enabled(): + patch_mixtral_moe_forward_zero3() + elif model_type == "qwen2": + transformers.models.qwen2.modeling_qwen2._get_unpad_data = ( # pylint: disable=protected-access + get_unpad_data + ) + elif model_type == "falcon": + transformers.models.falcon.modeling_falcon._get_unpad_data = ( # pylint: disable=protected-access + get_unpad_data + ) + elif model_type == "phi": + transformers.models.phi.modeling_phi._get_unpad_data = ( # pylint: disable=protected-access + get_unpad_data + ) + elif model_type == "gemma": + transformers.models.gemma.modeling_gemma._get_unpad_data = ( # pylint: disable=protected-access + get_unpad_data + ) + elif model_type == "starcoder2": + transformers.models.starcoder2.modeling_starcoder2._get_unpad_data = ( # pylint: disable=protected-access + get_unpad_data + ) + elif model_type == "gemmoe": + model_config = AutoConfig.from_pretrained(model_name, trust_remote_code=True) + # we need to load the model here in order for modeling_gemmoe to be available + with init_empty_weights(): + AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True) + module_name = model_config.__class__.__module__.replace( + ".configuration_gemmoe", ".modeling_gemmoe" + ) + modeling_gemmoe = importlib.import_module(module_name) + modeling_gemmoe._get_unpad_data = ( # pylint: disable=protected-access + get_unpad_data + ) diff --git a/src/axolotl/monkeypatch/relora.py b/src/axolotl/monkeypatch/relora.py new file mode 100644 index 0000000000000000000000000000000000000000..e4352cbe3d5806acee50b85571d8db7d7610a697 --- /dev/null +++ b/src/axolotl/monkeypatch/relora.py @@ -0,0 +1,469 @@ +"""Implements the ReLoRA training procedure from https://arxiv.org/abs/2307.05695, minus the initial full fine-tune.""" +import glob +import json +import logging +import os.path +import shutil +from functools import partial +from pathlib import Path +from typing import Dict, List, Sequence, Union + +import bitsandbytes as bnb +import peft +import safetensors.torch as st +import torch +from huggingface_hub import snapshot_download +from torch.distributed.optim import ZeroRedundancyOptimizer +from torch.optim.lr_scheduler import LRScheduler +from torch.optim.optimizer import Optimizer +from transformers import ( + TrainerCallback, + TrainerControl, + TrainerState, + TrainingArguments, +) +from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR + +from axolotl.utils.dict import DictDefault +from axolotl.utils.distributed import barrier, is_main_process + +LOG = logging.getLogger("axolotl.relora") + + +@torch.no_grad() +def magnitude_pruning_(tensor, prune_ratio): + tensor_magnitude = torch.abs(tensor) + threshold = torch.quantile( + tensor_magnitude.flatten().to(dtype=torch.float32), prune_ratio + ).to(dtype=tensor.dtype) + + mask = tensor_magnitude > threshold + tensor.mul_(mask.to(dtype=tensor.dtype)) + + +def reset_optimizer( + optimizer: torch.optim.Optimizer, + *, + reset_params: list[str], # where str is the key to a torch.nn.Parameter + optimizer_state_keys: list[str], + prune_ratio: float = 0.9, +): + pruning_fn = partial(magnitude_pruning_, prune_ratio=prune_ratio) + n_zeros = 0 + n_total = 0 + + optimizer_state = optimizer.state + if isinstance(optimizer, ZeroRedundancyOptimizer): + optimizer_state = optimizer.optim.state + + for param in reset_params: + param_state = optimizer_state[param] + if len(param_state) == 0: # no state for this param, happens for ZeRo optimizer + continue + for key in optimizer_state_keys: + pruning_fn( + param_state[key] + ) # pruning fn has to be inplace to keep the same keys in the dict + n_total += param_state[key].numel() + n_zeros += torch.sum(param_state[key] == 0).item() + + _zeroed = n_zeros / (1e-7 + n_total) * 100 + LOG.info(f"Percent of optimizer states zeroed: {_zeroed:.2f}") + LOG.info(f"absolute n of optimizer states zeroed: {n_zeros}") + + +class ReLoRACallback(TrainerCallback): + """Callback to merge LoRA weights into the base model and save full-weight checkpoints""" + + def __init__(self, cfg: DictDefault): + self.relora_steps = cfg.relora_steps + self.cpu_offload = cfg.relora_cpu_offload + self.quantized = cfg.load_in_4bit or cfg.load_in_8bit + self.last_full_model = cfg.base_model + self.resume_from_checkpoint = cfg.resume_from_checkpoint + + if not os.path.exists(self.last_full_model): + self.last_full_model = str(Path(snapshot_download(cfg.base_model))) + + assert os.path.exists( + self.last_full_model + ), "for ReLORA base_model must be a local path" + + self.num_lora_restarts = 0 + self.need_full_save = False + + def on_train_begin( + self, + _args: TrainingArguments, + _state: TrainerState, + control: TrainerControl, + model: peft.LoraModel, + **_kwargs, + ): + if self.resume_from_checkpoint: + weight_path = os.path.join(self.resume_from_checkpoint, "relora") + if not os.path.exists(weight_path): + LOG.warning( + "Resuming ReLoRA from checkpoint, but no full-weight save found" + ) + else: + LOG.info(f"Loading adjusted base weights from {weight_path}") + load_weight_checkpoint(model, weight_path) + return control + + def on_step_begin( + self, + args: TrainingArguments, + state: TrainerState, + control: TrainerControl, + model: peft.LoraModel, + optimizer: torch.optim.Optimizer, + **_kwargs, + ): + if state.global_step > 0 and state.global_step % self.relora_steps == 0: + checkpoint_folder = os.path.join( + args.output_dir, + f"{PREFIX_CHECKPOINT_DIR}-{state.global_step}", + "relora", + ) + + if "adam" in args.optim.lower(): + optimizer_state_keys = ["exp_avg", "exp_avg_sq"] + else: + raise ValueError(f"Optimizer {args.optim} not supported with ReLoRA") + + lora_params = [ + n + for n, p in model.named_parameters() + if p.requires_grad and "lora_" in n + ] + + model.save_pretrained( + os.path.join( + args.output_dir, + f"{PREFIX_CHECKPOINT_DIR}-{state.global_step}", + "adapter", + ), + safe_serialization=True, + ) + with torch.no_grad(): + merge_and_save( + model, + self.last_full_model, + checkpoint_folder, + reinit=True, + quantized=self.quantized, + actually_save=is_main_process(), + cpu_offload=self.cpu_offload, + ) + reset_optimizer( + optimizer, + reset_params=lora_params, + optimizer_state_keys=optimizer_state_keys, + prune_ratio=args.relora_prune_ratio, + ) + + if self.quantized: + self.last_full_model = checkpoint_folder + self.num_lora_restarts += 1 + + return control + + def on_save( + self, + args: TrainingArguments, + state: TrainerState, + control: TrainerControl, + model: peft.LoraModel, + **_kwargs, + ): + checkpoint_folder = os.path.join( + args.output_dir, f"{PREFIX_CHECKPOINT_DIR}-{state.global_step}", "relora" + ) + if ( + state.global_step >= self.relora_steps + and state.global_step % self.relora_steps != 0 + ): + if self.quantized: + if is_main_process() and self.last_full_model != checkpoint_folder: + # ensure the latest full parameter save is in the latest checkpoint + # folder, so that automatic pruning of checkpoints does not remove it + LOG.info(f"moving last full parameter save to {checkpoint_folder}") + os.makedirs(checkpoint_folder, exist_ok=True) + chunks = glob.glob( + f"{self.last_full_model}/model*.safetensors" + ) + glob.glob(f"{self.last_full_model}/model*.index.json") + for path in chunks: + new_path = os.path.abspath(shutil.move(path, checkpoint_folder)) + try: + os.symlink(new_path, path) + except OSError: + # probably on windows without permission to symlink + pass + + self.last_full_model = checkpoint_folder + else: + model.model.save_pretrained(checkpoint_folder, safe_serialization=True) + + return control + + def on_log( + self, + _args: TrainingArguments, + _state: TrainerState, + control: TrainerControl, + logs: Dict[str, float], + **_kwargs, + ): + logs["num_lora_restarts"] = self.num_lora_restarts + return control + + def on_train_end( + self, + args: TrainingArguments, + _state: TrainerState, + control: TrainerControl, + model: peft.LoraModel, + **_kwargs, + ): + if self.quantized: + # perform final merge and save + with torch.no_grad(): + merge_and_save( + model, + self.last_full_model, + args.output_dir, + reinit=False, + quantized=self.quantized, + actually_save=is_main_process(), + cpu_offload=self.cpu_offload, + ) + # no need to save if unquantized, as finetune.py will call merge_and_unload() + return control + + +class ReLoRAScheduler(LRScheduler): + """Wraps another scheduler to apply per-lora-restart learning rate warmups.""" + + def __init__( + self, + optimizer: Optimizer, + inner_schedule: LRScheduler, + relora_steps: int, + warmup_steps: int, + anneal_steps: int = 1, + min_lr_scale: float = 0.001, + ) -> None: + self.inner_schedule = inner_schedule + self.relora_steps = relora_steps + self.warmup_steps = warmup_steps + self.anneal_steps = anneal_steps + self.min_lr_scale = min_lr_scale + super().__init__(optimizer, inner_schedule.last_epoch, inner_schedule.verbose) + + def get_lr(self) -> float: + self.inner_schedule.last_epoch = self.last_epoch + + original = self.inner_schedule.get_lr() + step = self.last_epoch + + if step < self.relora_steps - self.warmup_steps: + scale = 1 + else: + per_relora_progress = step % self.relora_steps + if per_relora_progress < self.warmup_steps: + cycle_t = min(1.0, (per_relora_progress) / self.warmup_steps) + elif per_relora_progress > (self.relora_steps - self.anneal_steps): + cycle_t = min( + 1.0, + (self.relora_steps - per_relora_progress) / self.anneal_steps, + ) + else: + cycle_t = 1 + scale = cycle_t * (1 - self.min_lr_scale) + self.min_lr_scale + + if isinstance(original, Sequence): + return [lr * scale for lr in original] + return original * scale + + +def sharded_paths(path: str, module_names: List[str]) -> Dict[str, str]: + model_name = "model.safetensors" + if not os.path.exists(str(Path(path) / model_name)) and not os.path.exists( + str(Path(path) / f"{model_name}.index.json") + ): + model_name = "pytorch_model.bin" + + index_path = str(Path(path) / f"{model_name}.index.json") + if os.path.exists(index_path): + with open(index_path, "r", encoding="utf-8") as file: + data = json.load(file) + return data["weight_map"] + return {(module_name + ".weight"): model_name for module_name in module_names} + + +def lora_delta_weight(layer: peft.tuners.lora.LoraLayer, device) -> torch.Tensor: + if isinstance(layer, (peft.tuners.lora.Linear8bitLt, peft.tuners.lora.Linear4bit)): + adapter: Union[List[str], str] = layer.active_adapter + if isinstance(adapter, list): + if len(adapter) > 1: + raise ValueError("unhandled relora for multiple adapters") + adapter = adapter[0] + return ( + peft.utils.transpose( + layer.lora_B[adapter].weight.detach().to(device) + @ layer.lora_A[adapter].weight.detach().to(device), + getattr(layer, "fan_in_fan_out", False), + ) + * layer.scaling[adapter] + ) + + raise ValueError("unhandled lora layer type") + + +def find_lora_modules(model: peft.LoraModel) -> Dict[str, peft.tuners.lora.LoraLayer]: + modules: Dict[str, peft.tuners.lora.LoraLayer] = {} + + key_list = [key for key, _ in model.model.named_modules() if "lora" not in key] + for key in key_list: + try: + # pylint: disable=protected-access + _parent, target, _target_name = peft.utils._get_submodules(model.model, key) + except AttributeError: + continue + + if isinstance(target, peft.tuners.lora.LoraLayer): + modules[key] = target + + return modules + + +def update_weights( + target: peft.tuners.lora.LoraLayer, new_weight: torch.Tensor, reinit: bool, device +): + if reinit: + for adapter_name in target.lora_A: + target.reset_lora_parameters(adapter_name, True) + for adapter_name in target.lora_embedding_A: + target.reset_lora_parameters(adapter_name, True) + + if isinstance(target, peft.tuners.lora.Linear4bit): + # This could be faster, but the quantization of Linear4bit weights occurs + # when the module is moved from cpu to gpu. Without meddling *too* deeply in + # PEFT's innards or maintaining a duplicate of that codepath, this is good + # enough for now. + target.weight.quant_state = None + target.weight.data = new_weight.cpu() + target.to(device) + elif isinstance(target, peft.tuners.lora.Linear8bitLt): + target.weight.data = ( + bnb.nn.Int8Params(new_weight, requires_grad=False).to(device).data + ) + else: + target.weight.data = new_weight.to(device) + + +def merge_and_save( + model: peft.LoraModel, + model_src: str, + model_dst: str, + reinit: bool = False, + quantized: bool = False, + cpu_offload: bool = False, + actually_save: bool = True, +): + modules = find_lora_modules(model) + + if not quantized: + for module_name, target in modules.items(): + active_adapter = target.active_adapter + if isinstance(active_adapter, list): + active_adapter = active_adapter[0] + update = target.get_delta_weight(active_adapter).detach() + target.weight.data += update + + if reinit: + for adapter_name in target.lora_A: + target.reset_lora_parameters(adapter_name, True) + for adapter_name in target.lora_embedding_A: + target.reset_lora_parameters(adapter_name, True) + return + + os.makedirs(model_dst, exist_ok=True) + shard_paths = sharded_paths(model_src, modules.keys()) + out_shard_paths = {} + + unique_shards = list(set(shard_paths.values())) + for shard_path in unique_shards: + out_tensors = {} + if shard_path.endswith(".safetensors"): + in_tensors = st.load_file(str(Path(model_src) / shard_path)) + else: + in_tensors = torch.load(Path(model_src) / shard_path) + if "state_dict" in in_tensors: + in_tensors = in_tensors["state_dict"] + + for module_name, target in modules.items(): + key = module_name + ".weight" + if key not in shard_paths or shard_paths[key] != shard_path: + continue + + orig_weight = in_tensors[key] + old_dev = target.weight.device + math_dev = "cpu" if cpu_offload else old_dev + + delta_weight = lora_delta_weight(target, math_dev) + new_weight = orig_weight.to(math_dev) + delta_weight + del delta_weight + + if actually_save: + out_tensors[key] = new_weight.half().cpu() + + update_weights(target, new_weight, reinit=reinit, device=old_dev) + + if actually_save: + out_shard_name = shard_path + if out_shard_name.startswith("pytorch_model"): + out_shard_name = ( + out_shard_name.replace("pytorch_model", "model").rstrip(".bin") + + ".safetensors" + ) + + for module_name in in_tensors: + if module_name not in out_tensors: + out_tensors[module_name] = in_tensors[module_name].half() + out_shard_paths[module_name] = out_shard_name + + shard_fn = str(Path(model_dst) / out_shard_name) + LOG.info(f"saving tensors to {shard_fn}") + st.save_file(out_tensors, shard_fn, metadata={"format": "pt"}) + + barrier() + del in_tensors + del out_tensors + torch.cuda.empty_cache() + + if actually_save and len(unique_shards) > 1: + with open( + str(Path(model_dst, "model.safetensors.index.json")), "w", encoding="utf-8" + ) as file: + json.dump({"metadata": {}, "weight_map": out_shard_paths}, file) + + +def load_weight_checkpoint(model: peft.LoraModel, checkpoint_path: str): + modules = find_lora_modules(model) + shard_paths = sharded_paths(checkpoint_path, modules.keys()) + unique_shards = list(set(shard_paths.values())) + + for shard_path in unique_shards: + tensors = st.load_file(os.path.join(checkpoint_path, shard_path)) + + for module_name, target in modules.items(): + key = module_name + ".weight" + if key not in shard_paths or shard_paths[key] != shard_path: + continue + + new_weight = tensors[key] + update_weights( + target, new_weight, reinit=False, device=target.weight.device + ) diff --git a/src/axolotl/monkeypatch/stablelm_attn_hijack_flash.py b/src/axolotl/monkeypatch/stablelm_attn_hijack_flash.py new file mode 100644 index 0000000000000000000000000000000000000000..0269f90157b12c46a460545842d69da65afc394a --- /dev/null +++ b/src/axolotl/monkeypatch/stablelm_attn_hijack_flash.py @@ -0,0 +1,415 @@ +# coding=utf-8 +# Copyright 2023 Stability AI, EleutherAI, and The HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# This code is based off the following work: +# https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py +# https://github.com/huggingface/transformers/blob/main/src/transformers/models/gpt_neox/modeling_gpt_neox.py +""" PyTorch StableLM Epoch model. """ +import importlib +import math +from typing import Optional, Tuple, Union + +import torch +import torch.utils.checkpoint +from accelerate import init_empty_weights +from einops import rearrange +from flash_attn.flash_attn_interface import ( # pylint: disable=ungrouped-imports + flash_attn_varlen_qkvpacked_func, +) +from torch import nn +from transformers import AutoConfig, AutoModelForCausalLM +from transformers.modeling_outputs import BaseModelOutputWithPast +from transformers.utils import logging + +from axolotl.monkeypatch.utils import get_cu_seqlens_from_pos_ids + +logger = logging.get_logger(__name__) + + +def replace_stablelm_attn_with_flash_attn(model_name="stabilityai/stablelm-3b-4e1t"): + # this is a wonky hack to get the remotely loaded module + model_config = AutoConfig.from_pretrained(model_name, trust_remote_code=True) + # we need to load the model here in order for modeling_stablelm_epoch to be available + with init_empty_weights(): + AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True) + module_name = model_config.__class__.__module__.replace( + ".configuration_stablelm_epoch", ".modeling_stablelm_epoch" + ) + modeling_stablelm = importlib.import_module(module_name) + modeling_stablelm.Attention.forward = ( # pylint: disable=protected-access + flashattn_attn + ) + modeling_stablelm.StableLMEpochModel.forward = ( # pylint: disable=protected-access + stablelm_model_forward + ) + modeling_stablelm.DecoderLayer.forward = ( # pylint: disable=protected-access + decoder_layer_forward + ) + + +def rotate_half(x: torch.Tensor): + """Rotates half the hidden dims of the input.""" + # pylint: disable=invalid-name + x1, x2 = torch.chunk(x, 2, dim=-1) + return torch.cat((-x2, x1), dim=-1) + + +def apply_rotary_pos_emb(q, k, cos, sin, position_ids): + # The first two dimensions of cos and sin are always 1, so we can `squeeze` them. + # pylint: disable=invalid-name + cos = cos.squeeze(1).squeeze(0) # [seq_len, dim] + sin = sin.squeeze(1).squeeze(0) # [seq_len, dim] + cos = cos[position_ids].unsqueeze(1) # [batch_size, 1, seq_len, dim] + sin = sin[position_ids].unsqueeze(1) # [batch_size, 1, seq_len, dim] + q_embed = (q * cos) + (rotate_half(q) * sin) + k_embed = (k * cos) + (rotate_half(k) * sin) + return q_embed, k_embed + + +def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: + """ + This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch, + num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim) + """ + batch, num_key_value_heads, slen, head_dim = hidden_states.shape + if n_rep == 1: + return hidden_states + hidden_states = hidden_states[:, :, None, :, :].expand( + batch, num_key_value_heads, n_rep, slen, head_dim + ) + return hidden_states.reshape(batch, num_key_value_heads * n_rep, slen, head_dim) + + +def flashattn_attn( + self, + hidden_states: torch.FloatTensor, + attention_mask: torch.FloatTensor, + position_ids: torch.LongTensor, + past_key_value: Optional[Tuple[torch.Tensor]] = None, + output_attentions: Optional[bool] = False, # pylint: disable=unused-argument + use_cache: Optional[bool] = False, + cu_seqlens: Optional[torch.Tensor] = None, + max_seqlen: Optional[torch.Tensor] = None, +) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: + bsz, q_len, _ = hidden_states.size() + + query_states = self.q_proj(hidden_states) + key_states = self.k_proj(hidden_states) + value_states = self.v_proj(hidden_states) + + query_states = query_states.view( + bsz, q_len, self.num_heads, self.head_dim + ).transpose(1, 2) + key_states = key_states.view( + bsz, q_len, self.num_key_value_heads, self.head_dim + ).transpose(1, 2) + value_states = value_states.view( + bsz, q_len, self.num_key_value_heads, self.head_dim + ).transpose(1, 2) + + query_rot = query_states[..., : self.rotary_ndims] + query_pass = query_states[..., self.rotary_ndims :] + key_rot = key_states[..., : self.rotary_ndims] + key_pass = key_states[..., self.rotary_ndims :] + + kv_seq_len = key_states.shape[-2] + if past_key_value is not None: + kv_seq_len += past_key_value[0].shape[-2] + cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len) + query_states, key_states = apply_rotary_pos_emb( + query_rot, key_rot, cos, sin, position_ids + ) + + # [batch_size, num_heads, seq_len, head_dim] + query_states = torch.cat((query_states, query_pass), dim=-1) + key_states = torch.cat((key_states, key_pass), dim=-1) + + if past_key_value is not None: + # Reuse k, v, self_attention + key_states = torch.cat((past_key_value[0], key_states), dim=2) + value_states = torch.cat((past_key_value[1], value_states), dim=2) + + past_key_value = (key_states, value_states) if use_cache else None + + # Repeat k/v heads if n_kv_heads < n_heads + key_states = repeat_kv(key_states, self.num_key_value_groups) + value_states = repeat_kv(value_states, self.num_key_value_groups) + + if cu_seqlens is not None and max_seqlen is not None and cu_seqlens.dim() == 1: + # special handling using sample packing + qkv = torch.stack( + [query_states, key_states, value_states], dim=2 + ) # [bsz, nh, 3, q_len, hd] + qkv = qkv.transpose(1, 3) # [bsz, q_len, 3, nh, hd] + qkv = rearrange(qkv, "b s ... -> (b s) ...") + softmax_scale = None + + output = flash_attn_varlen_qkvpacked_func( + qkv, cu_seqlens, max_seqlen, 0.0, softmax_scale=softmax_scale, causal=True + ) + + attn_output = rearrange(output, "(b s) ... -> b s ...", b=bsz) + attn_output = rearrange(attn_output, "b s h d -> b s (h d)") + else: + attn_weights = torch.matmul( + query_states, key_states.transpose(2, 3) + ) / math.sqrt(self.head_dim) + + if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len): + raise ValueError( + f"Attention weights should be of size {(bsz, self.num_heads, q_len, kv_seq_len)}, but is" + f" {attn_weights.size()}" + ) + + if attention_mask is not None: + if attention_mask.size() != (bsz, 1, q_len, kv_seq_len): + raise ValueError( + f"Attention mask should be of size {(bsz, 1, q_len, kv_seq_len)}, but is {attention_mask.size()}" + ) + attn_weights = attn_weights + attention_mask + + # Upcast attention to fp32 + attn_weights = nn.functional.softmax( + attn_weights, dim=-1, dtype=torch.float32 + ).to(query_states.dtype) + attn_output = torch.matmul(attn_weights, value_states) + + if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim): + raise ValueError( + f"`attn_output` should be of size {(bsz, self.num_heads, q_len, self.head_dim)}, but is" + f" {attn_output.size()}" + ) + + # Merge heads + attn_output = attn_output.transpose(1, 2).contiguous() + attn_output = attn_output.reshape(bsz, q_len, self.hidden_size) + + # Final linear projection + attn_output = self.o_proj(attn_output) + + return attn_output, None, past_key_value + + +def decoder_layer_forward( + self, + hidden_states: Optional[torch.FloatTensor], + attention_mask: Optional[torch.FloatTensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_value: Optional[Tuple[torch.Tensor]] = None, + output_attentions: Optional[bool] = False, + use_cache: Optional[bool] = False, + cu_seqlens: Optional[torch.Tensor] = None, + max_seqlen: Optional[torch.Tensor] = None, +) -> Union[ + Tuple[torch.Tensor], Optional[Tuple[torch.Tensor, Tuple[torch.FloatTensor, ...]]] +]: + # pylint: disable=duplicate-code + residual = hidden_states + + hidden_states = self.input_layernorm(hidden_states) + + # Self Attention + hidden_states, self_attn_weights, present_key_value = self.self_attn( + hidden_states=hidden_states, + attention_mask=attention_mask, + position_ids=position_ids, + past_key_value=past_key_value, + output_attentions=output_attentions, + use_cache=use_cache, + cu_seqlens=cu_seqlens, + max_seqlen=max_seqlen, + ) + hidden_states = residual + hidden_states + + # Fully Connected + residual = hidden_states + hidden_states = self.post_attention_layernorm(hidden_states) + hidden_states = self.mlp(hidden_states) + hidden_states = residual + hidden_states + + outputs = (hidden_states,) + + if output_attentions: + outputs += (self_attn_weights,) + + if use_cache: + outputs += (present_key_value,) + + return outputs + + +def stablelm_model_forward( + self, + input_ids: Optional[torch.LongTensor] = None, + attention_mask: Optional[torch.FloatTensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, +) -> Union[Tuple, BaseModelOutputWithPast]: + # pylint: disable=duplicate-code + output_attentions = ( + output_attentions + if output_attentions is not None + else self.config.output_attentions + ) + output_hidden_states = ( + output_hidden_states + if output_hidden_states is not None + else self.config.output_hidden_states + ) + use_cache = use_cache if use_cache is not None else self.config.use_cache + + return_dict = ( + return_dict if return_dict is not None else self.config.use_return_dict + ) + + # Retrieve input_ids and inputs_embeds + if input_ids is not None and inputs_embeds is not None: + raise ValueError( + "You cannot specify both decoder_input_ids and decoder_inputs_embeds at the same time" + ) + if input_ids is not None: + batch_size, seq_length = input_ids.shape + elif inputs_embeds is not None: + batch_size, seq_length, _ = inputs_embeds.shape + else: + raise ValueError( + "You have to specify either decoder_input_ids or decoder_inputs_embeds" + ) + + seq_length_with_past = seq_length + past_key_values_length = 0 + + if past_key_values is not None: + past_key_values_length = past_key_values[0][0].shape[2] + seq_length_with_past = seq_length_with_past + past_key_values_length + + cu_seqlens = None + max_seqlen = None + if position_ids is None: + device = input_ids.device if input_ids is not None else inputs_embeds.device + position_ids = torch.arange( + past_key_values_length, + seq_length + past_key_values_length, + dtype=torch.long, + device=device, + ) + position_ids = position_ids.unsqueeze(0).view(-1, seq_length) + else: + position_ids = position_ids.view(-1, seq_length).long() + cu_seqlens, max_seqlen = get_cu_seqlens_from_pos_ids(position_ids) + cu_seqlens = cu_seqlens.squeeze() + + if inputs_embeds is None: + inputs_embeds = self.embed_tokens(input_ids) + # Embed positions + if attention_mask is None: + attention_mask = torch.ones( + (batch_size, seq_length_with_past), + dtype=torch.bool, + device=inputs_embeds.device, + ) + attention_mask = ( + self._prepare_decoder_attention_mask( # pylint: disable=protected-access + attention_mask, + (batch_size, seq_length), + inputs_embeds, + past_key_values_length, + ) + ) + + hidden_states = inputs_embeds + + if self.gradient_checkpointing and self.training: + if use_cache: + logger.warning( + "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..." + ) + use_cache = False + + # Decoder layers + all_hidden_states = () if output_hidden_states else None + all_self_attns = () if output_attentions else None + next_decoder_cache = () if use_cache else None + + for idx, decoder_layer in enumerate(self.layers): + if output_hidden_states: + all_hidden_states += (hidden_states,) + + past_key_value = past_key_values[idx] if past_key_values is not None else None + + if self.gradient_checkpointing and self.training: + + def create_custom_forward(module): + def custom_forward(*inputs): + # None for past_key_value + return module(*inputs) + + return custom_forward + + layer_outputs = torch.utils.checkpoint.checkpoint( + create_custom_forward(decoder_layer), + hidden_states, + attention_mask, + position_ids, + past_key_value, + output_attentions, + None, + cu_seqlens, + max_seqlen, + ) + else: + layer_outputs = decoder_layer( + hidden_states, + attention_mask=attention_mask, + position_ids=position_ids, + past_key_value=past_key_value, + output_attentions=output_attentions, + use_cache=use_cache, + cu_seqlens=cu_seqlens, + max_seqlen=max_seqlen, + ) + + hidden_states = layer_outputs[0] + + if use_cache: + next_decoder_cache += (layer_outputs[2 if output_attentions else 1],) + + if output_attentions: + all_self_attns += (layer_outputs[1],) + + hidden_states = self.norm(hidden_states) + + # Add hidden states from the last decoder layer + if output_hidden_states: + all_hidden_states += (hidden_states,) + + next_cache = next_decoder_cache if use_cache else None + if not return_dict: + return tuple( + v + for v in [hidden_states, next_cache, all_hidden_states, all_self_attns] + if v is not None + ) + return BaseModelOutputWithPast( + last_hidden_state=hidden_states, + past_key_values=next_cache, + hidden_states=all_hidden_states, + attentions=all_self_attns, + ) diff --git a/src/axolotl/monkeypatch/utils.py b/src/axolotl/monkeypatch/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..e43c58650a9143163b7461c228d0b320cd2fe4bc --- /dev/null +++ b/src/axolotl/monkeypatch/utils.py @@ -0,0 +1,227 @@ +""" +Shared utils for the monkeypatches +""" +from typing import Optional + +import torch +import torch.nn.functional as F +from transformers.modeling_attn_mask_utils import ( + _prepare_4d_causal_attention_mask, + _prepare_4d_causal_attention_mask_for_sdpa, +) +from transformers.utils import is_torch_bf16_gpu_available + + +@torch.jit.script +def get_max_seqlen_in_batch(attention_mask: torch.Tensor) -> torch.Tensor: + max_num = int(torch.max(attention_mask).item()) + batch_size, _ = attention_mask.shape + counts = torch.zeros((batch_size, max_num), dtype=torch.int32) + + for i in range(1, max_num + 1): + mask = attention_mask == i + counts[:, i - 1] = torch.sum(mask, dim=-1).to(dtype=torch.int32) + + result = counts.flatten() + nonzero_indices = torch.nonzero(result).squeeze(-1) + return result[nonzero_indices] + + +@torch.jit.script +def get_unpad_data(attention_mask: torch.Tensor): + device = attention_mask.device + seqlens_in_batch = get_max_seqlen_in_batch(attention_mask) + indices = torch.nonzero(attention_mask.flatten()).flatten() + max_seqlen_in_batch = seqlens_in_batch.max().item() + cu_seqlens = ( + F.pad(torch.cumsum(seqlens_in_batch, dim=0, dtype=torch.int32), (1, 0)) + .to(device=device) + .detach() + ) + return ( + indices, + cu_seqlens, + max_seqlen_in_batch, + ) + + +def get_cu_seqlens(attn_mask): + """generate a cumulative sequence length mask for flash attention using attn mask""" + if len(attn_mask.shape) == 1: + attn_mask = attn_mask.unsqueeze(0) + + device = attn_mask.device + results = [] + max_seq_lens = [] + + for row in attn_mask: + # Exclude zeros to avoid adding their positions to the mask + t_non_zeros = row[row != 0] + # Find where the sequence number changes (including the first position) + seq_change = torch.cat( + [ + torch.tensor([1], dtype=torch.int32, device=device), + t_non_zeros[1:] != t_non_zeros[:-1], + ] + ) + # Get the indices where the sequence changes + change_indices = torch.cat( + [ + (seq_change == 1).nonzero(as_tuple=True)[0], + torch.tensor([len(t_non_zeros)], dtype=torch.int32, device=device), + ] + ) + # Calculate the sequence lengths + seq_lengths = change_indices[1:] - change_indices[:-1] + # Calculate the length of the final sequence or padding + final_seq_length = len(row) - change_indices[-1] + # Append the length of the final sequence or padding to seq_lengths + if final_seq_length.item(): + seq_lengths = torch.cat( + [ + seq_lengths, + torch.tensor( + [final_seq_length.item()], dtype=torch.int32, device=device + ), + ] + ) + # Calculate the cumulative sequence lengths + cu_seqlens = torch.cat( + [torch.tensor([0], dtype=torch.int32, device=device), seq_lengths.cumsum(0)] + ) + max_seq_len = (cu_seqlens[1:] - cu_seqlens[:-1]).max() + results.append(cu_seqlens) + max_seq_lens.append(max_seq_len) + + return torch.stack(results).to(dtype=torch.int32), torch.stack(max_seq_lens) + + +def get_cu_seqlens_from_pos_ids(position_ids): + """generate a cumulative sequence length mask for flash attention using pos ids""" + if len(position_ids.shape) == 1: + position_ids = position_ids.unsqueeze(0) + + device = position_ids.device + results = [] + max_seq_lens = [] + + for row in position_ids: + # Count the number of consecutive zeros from the right side + padding_length = (row == 0).int().flip(dims=[0]).cumprod(dim=0).sum().item() + + # Adjust the row to exclude padding + adjusted_row = row[:-padding_length] if padding_length else row.clone() + + # Find where the position resets to 0 (indicating a new sequence) + seq_starts = torch.cat( + [ + torch.tensor([True], dtype=torch.bool, device=device), + adjusted_row[1:] == 0, + ] + ) + # Get the indices where the sequence starts + start_indices = torch.cat( + [ + torch.nonzero(seq_starts).unbind(dim=1)[0], + torch.tensor([len(adjusted_row)], dtype=torch.int32, device=device), + ] + ) + # Calculate the sequence lengths + seq_lengths = start_indices[1:] - start_indices[:-1] + # Calculate the cumulative sequence lengths + cu_seqlens = torch.cat( + [torch.tensor([0], dtype=torch.int32, device=device), seq_lengths.cumsum(0)] + ) + # Append the padding length to the cumulative sequence lengths + if padding_length: + cu_seqlens = torch.cat( + [cu_seqlens, torch.tensor([len(row)], dtype=torch.int32, device=device)] + ) + max_seq_len = (cu_seqlens[1:] - cu_seqlens[:-1]).max() + results.append(cu_seqlens) + max_seq_lens.append(max_seq_len) + + # Find the maximum value across all tensors + max_value = max(t.max() for t in results) + + # Find the length of the longest tensor + max_length = max(t.size(0) for t in results) + + # Pad each tensor to the same length and collect them in a list + padded_results = [ + F.pad(t, (0, max_length - t.size(0)), "constant", max_value) for t in results + ] + + return torch.stack(padded_results).to(dtype=torch.int32), torch.stack(max_seq_lens) + + +def set_module_name(model, name, value): + if "." in name: + parent_name = name.rsplit(".", 1)[0] + child_name = name[len(parent_name) + 1 :] + parent = model.get_submodule(parent_name) + else: + parent_name = "" + parent = model + child_name = name + + setattr(parent, child_name, value) + + +def mask_2d_to_4d( + mask: torch.Tensor, dtype: torch.dtype, tgt_len: Optional[int] = None +): + """ + Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`. + This expansion handles packed sequences so that sequences share the same attention mask integer value + when they attend to each other within that sequence. + This expansion transforms the mask to lower triangular form to prevent future peeking. + """ + bsz, src_len = mask.size() + tgt_len = tgt_len if tgt_len is not None else src_len + + mask = mask.unsqueeze(1).unsqueeze(2) + mask = mask.expand(bsz, 1, tgt_len, src_len) + + # Create a binary mask from the original mask where zeros remain zeros and all other values are set to one + binary_mask = torch.where( + mask != 0, + torch.tensor(1, device=mask.device).to(dtype), + torch.tensor(0, device=mask.device).to(dtype), + ) + + # Create a block-diagonal mask. + # we multiply by the binary mask so that 0's in the original mask are correctly excluded + zero_one_mask = torch.eq(mask, mask.transpose(-1, -2)).int() * binary_mask + + # Now let's create a lower triangular mask of ones that will zero out the upper triangular part + lower_triangular_ones = torch.tril(torch.ones((tgt_len, src_len), dtype=dtype)).to( + mask.device + ) + + # Use the lower triangular mask to zero out the upper triangular part of the zero_one_mask + masked_zero_one_mask = zero_one_mask * lower_triangular_ones + + return masked_zero_one_mask + + +def patched_prepare_4d_causal_attention_mask( + attention_mask: Optional[torch.Tensor], + *args, +): + dtype = torch.bfloat16 if is_torch_bf16_gpu_available() else torch.float32 + return _prepare_4d_causal_attention_mask( + mask_2d_to_4d(attention_mask, dtype=dtype), + *args, + ) + + +def patched_prepare_4d_causal_attention_mask_for_sdpa( + attention_mask: Optional[torch.Tensor], + *args, +): + dtype = torch.bfloat16 if is_torch_bf16_gpu_available() else torch.float32 + return _prepare_4d_causal_attention_mask_for_sdpa( + mask_2d_to_4d(attention_mask, dtype=dtype), + *args, + ) diff --git a/src/axolotl/prompt_strategies/__init__.py b/src/axolotl/prompt_strategies/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e62a5c20ce4dca7e25a47d73c2803f96f86b563e --- /dev/null +++ b/src/axolotl/prompt_strategies/__init__.py @@ -0,0 +1,26 @@ +"""Module to load prompt strategies.""" + +import importlib +import inspect + +from axolotl.prompt_strategies.user_defined import UserDefinedDatasetConfig + + +def load(strategy, tokenizer, cfg, ds_cfg): + try: + load_fn = "load" + if strategy.split(".")[-1].startswith("load_"): + load_fn = strategy.split(".")[-1] + strategy = ".".join(strategy.split(".")[:-1]) + mod = importlib.import_module(f".{strategy}", "axolotl.prompt_strategies") + func = getattr(mod, load_fn) + load_kwargs = {} + if strategy == "user_defined": + load_kwargs["ds_cfg"] = UserDefinedDatasetConfig(**ds_cfg) + else: + sig = inspect.signature(func) + if "ds_cfg" in sig.parameters: + load_kwargs["ds_cfg"] = ds_cfg + return func(tokenizer, cfg, **load_kwargs) + except Exception: # pylint: disable=broad-exception-caught + return None diff --git a/src/axolotl/prompt_strategies/alpaca_chat.py b/src/axolotl/prompt_strategies/alpaca_chat.py new file mode 100644 index 0000000000000000000000000000000000000000..975fee889e1a2168508f64611781ab9bb537b114 --- /dev/null +++ b/src/axolotl/prompt_strategies/alpaca_chat.py @@ -0,0 +1,120 @@ +"""Module for Alpaca prompt strategy classes""" + +from typing import Any, Dict, Optional, Tuple + +from axolotl.prompt_tokenizers import ( + AlpacaPromptTokenizingStrategy, + InstructionPromptTokenizingStrategy, +) +from axolotl.prompters import AlpacaPrompter, PromptStyle, UnpromptedPrompter + + +def load(tokenizer, cfg, ds_cfg: Optional[Dict[str, Any]] = None): + prompt_style = PromptStyle.CHAT.value + if ds_cfg and "conversation" in ds_cfg: + prompt_style = ds_cfg["conversation"] + + return AlpacaPromptTokenizingStrategy( + AlpacaPrompter(prompt_style), + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + ) + + +class AlpacaConcisePrompter(AlpacaPrompter): + """ + Alpaca Prompter extending the system prompt to ask for concise chat-instruct answers + """ + + system_prompt = "Below is an instruction from a USER that describes a task, paired with an input that provides further context. The ASSISTANT writes a response that concisely and appropriately completes the request.\n\n" + system_no_input_prompt = "Below is an instruction from a USER that describes a task. The ASSISTANT writes a response that appropriately and concisely completes the request.\n\n" + + +class AlpacaChatPrompter(AlpacaPrompter): + """ + Alpaca Chat Prompter extending the system prompt to for chat-instruct answers + """ + + system_prompt = "Below is an instruction from a USER that describes a task, paired with an input that provides further context. The ASSISTANT writes a response that concisely and appropriately completes the request.\n\n" + system_no_input_prompt = "Below is an instruction from a USER that describes a task. The ASSISTANT writes a response that appropriately and concisely completes the request.\n\n" + + def __init__(self): # pylint: disable=super-init-not-called + self.prompt_style = PromptStyle.CHAT.value + self.match_prompt_style() + + +class NoSystemPrompter(AlpacaPrompter): + """ + Null Prompter with no system prompts + """ + + system_prompt = "" + system_no_input_prompt = "" + turn_format = "{instruction} {input} " + turn_no_input_format = "{instruction} " + + def __init__(self): # pylint: disable=super-init-not-called + pass + + +class AlpacaQAPromptTokenizingStrategy(InstructionPromptTokenizingStrategy): + """ + Tokenizing strategy for AlpacaQA + """ + + def parse_instruction_fields(self, prompt) -> Tuple[str, str, str]: + return ( + prompt["question"], + "", + prompt["answer"], + ) + + +class CamelAIPromptTokenizingStrategy(InstructionPromptTokenizingStrategy): + """ + Tokenizing strategy for CamelAI datasets + """ + + def parse_instruction_fields(self, prompt) -> Tuple[str, str, str]: + return ( + prompt["message_1"], + "", + prompt["message_2"], + ) + + +def load_concise(tokenizer, cfg): + return AlpacaPromptTokenizingStrategy( + AlpacaConcisePrompter(PromptStyle.CHAT.value), + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + ) + + +def load_qa(tokenizer, cfg): + return AlpacaQAPromptTokenizingStrategy( + AlpacaChatPrompter(), + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + ) + + +def load_camel_ai(tokenizer, cfg): + return CamelAIPromptTokenizingStrategy( + AlpacaChatPrompter(), + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + ) + + +def load_no_prompt(tokenizer, cfg): + return AlpacaPromptTokenizingStrategy( + UnpromptedPrompter(PromptStyle.CHAT.value), + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + ) diff --git a/src/axolotl/prompt_strategies/alpaca_instruct.py b/src/axolotl/prompt_strategies/alpaca_instruct.py new file mode 100644 index 0000000000000000000000000000000000000000..143f070f2a9f826f6ed83e64305760229f535173 --- /dev/null +++ b/src/axolotl/prompt_strategies/alpaca_instruct.py @@ -0,0 +1,22 @@ +"""Module loading the AlpacaInstructPromptTokenizingStrategy class""" + +from axolotl.prompt_tokenizers import AlpacaPromptTokenizingStrategy +from axolotl.prompters import AlpacaPrompter, PromptStyle, UnpromptedPrompter + + +def load(tokenizer, cfg): + return AlpacaPromptTokenizingStrategy( + AlpacaPrompter(PromptStyle.INSTRUCT.value), + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + ) + + +def load_no_prompt(tokenizer, cfg): + return AlpacaPromptTokenizingStrategy( + UnpromptedPrompter(PromptStyle.INSTRUCT.value), + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + ) diff --git a/src/axolotl/prompt_strategies/alpaca_w_system.py b/src/axolotl/prompt_strategies/alpaca_w_system.py new file mode 100644 index 0000000000000000000000000000000000000000..8c8cc07435f9e65e5401588aded9c3791b1c6de9 --- /dev/null +++ b/src/axolotl/prompt_strategies/alpaca_w_system.py @@ -0,0 +1,163 @@ +""" +Prompt strategies loader for alpaca instruction datasets with system prompts +""" +from typing import Generator, Tuple, Union + +from axolotl.prompt_tokenizers import PromptTokenizingStrategy +from axolotl.prompters import AlpacaPrompter, PromptStyle + + +class InstructionWSystemPromptTokenizingStrategy(PromptTokenizingStrategy): + """ + Tokenizing strategy for instruction-based prompts. + """ + + def parse_instruction_fields(self, prompt) -> Tuple[str, str, str, str]: + return ( + prompt["instruction"], + prompt["input"] if "input" in prompt else "", + prompt["output"], + prompt["system"], + ) + + def tokenize_prompt(self, prompt): + # pylint: disable=duplicate-code + ( + instruction, + input, # pylint: disable=redefined-builtin + response, + system, + ) = self.parse_instruction_fields(prompt) + user_prompt = next( + iter( + self.prompter.build_prompt_w_system( + system, + instruction, + input, + ) + ) + ) + tokenized_prompt = self._tokenize(user_prompt, add_eos_token=False) + if not self.train_on_inputs: + user_prompt_len = len(tokenized_prompt["input_ids"]) + # TODO this could be sped up using numpy array slicing + tokenized_prompt["labels"] = [-100] * user_prompt_len + tokenized_res_prompt = self._tokenize( + response, strip_bos_token=True, add_eos_token=True + ) + tokenized_prompt["input_ids"] += tokenized_res_prompt["input_ids"] + tokenized_prompt["attention_mask"] += tokenized_res_prompt["attention_mask"] + tokenized_prompt["labels"] += tokenized_res_prompt["input_ids"] + + return tokenized_prompt + + +class SystemDataPrompter(AlpacaPrompter): + """ + Alpaca Style Prompter that uses system prompts from the dataset + """ + + system_format: str = "### System:\n{system}\n\n" + + def build_prompt_w_system( + self, + system: str, + instruction: str, + input: Union[None, str] = None, # pylint: disable=redefined-builtin + output: Union[None, str] = None, + ) -> Generator[str, None, None]: + # returns the full prompt from instruction and optional input + # if a label (=response, =output) is provided, it's also appended. + formatted_sys_prompt = ( + self.system_format.format(system=system) + if system and self.system_format + else "" + ) + if input: + res = formatted_sys_prompt + self.turn_format.format( + instruction=instruction, input=input + ) + else: + res = formatted_sys_prompt + self.turn_no_input_format.format( + instruction=instruction + ) + if output: + res = f"{res}{output}" + yield res + + +class OpenOrcaSystemDataPrompter(SystemDataPrompter): + """ + Alpaca Style Prompter that uses system prompts from the dataset, with OpenOrca prompts + """ + + def match_prompt_style(self): + # pylint: disable=duplicate-code + if self.prompt_style == PromptStyle.INSTRUCT.value: + self.turn_format = "### Human:\n{instruction}\n### Additional Context:\n{input}\n### Assistant:\n" + self.turn_no_input_format = "### Human:\n{instruction}\n### Assistant:\n" + self.system_format = "### System:\n{system}\n" + if self.prompt_style == PromptStyle.CHAT.value: + self.turn_format = "USER: {instruction}\n{input}\nASSISTANT:" + self.turn_no_input_format = "USER: {instruction}\nASSISTANT:" + self.system_format = "SYSTEM: {system}\n" + if self.prompt_style == PromptStyle.CHATML.value: + self.turn_format = "<|im_start|>user\n{instruction}\n{input}<|im_end|>\n<|im_start|>assistant\n" + self.turn_no_input_format = ( + "<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n" + ) + self.system_format = "<|im_start|>system\n{system}<|im_end|>\n" + + +class OpenOrcaPromptTokenizingStrategy(InstructionWSystemPromptTokenizingStrategy): + """ + Tokenizing strategy for OpenOrca datasets + """ + + def parse_instruction_fields(self, prompt) -> Tuple[str, str, str, str]: + return ( + prompt["question"], + "", + prompt["response"], + prompt["system_prompt"], + ) + + +def load(tokenizer, cfg): + return load_chat(tokenizer, cfg) + + +def load_instruct(tokenizer, cfg): + return InstructionWSystemPromptTokenizingStrategy( + SystemDataPrompter(PromptStyle.INSTRUCT.value), + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + ) + + +def load_chat(tokenizer, cfg): + return InstructionWSystemPromptTokenizingStrategy( + SystemDataPrompter(PromptStyle.CHAT.value), + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + ) + + +def load_open_orca(tokenizer, cfg): + return OpenOrcaPromptTokenizingStrategy( + OpenOrcaSystemDataPrompter(PromptStyle.INSTRUCT.value), + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + ) + + +def load_open_orca_chatml(tokenizer, cfg): + return OpenOrcaPromptTokenizingStrategy( + OpenOrcaSystemDataPrompter(PromptStyle.CHATML.value), + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + ) diff --git a/src/axolotl/prompt_strategies/base.py b/src/axolotl/prompt_strategies/base.py new file mode 100644 index 0000000000000000000000000000000000000000..fce2aba14a8da1b4ba0286fa7839fc77b46a8dfd --- /dev/null +++ b/src/axolotl/prompt_strategies/base.py @@ -0,0 +1,20 @@ +""" +module for base dataset transform strategies +""" + +import importlib +import logging + +LOG = logging.getLogger("axolotl") + + +def load(strategy, cfg, module_base=None, **kwargs): + try: + load_fn = strategy.split(".")[-1] + strategy = ".".join(strategy.split(".")[:-1]) + mod = importlib.import_module(f".{strategy}", module_base) + func = getattr(mod, load_fn) + return func(cfg, **kwargs) + except Exception: # pylint: disable=broad-exception-caught + LOG.warning(f"unable to load strategy {strategy}") + return None diff --git a/src/axolotl/prompt_strategies/chat_template.py b/src/axolotl/prompt_strategies/chat_template.py new file mode 100644 index 0000000000000000000000000000000000000000..8dff3845b715b9b979a275c078044c27b9ba4184 --- /dev/null +++ b/src/axolotl/prompt_strategies/chat_template.py @@ -0,0 +1,78 @@ +""" +HF Chat Templates prompt strategy +""" +from typing import Any, Dict, Optional + +from axolotl.prompt_tokenizers import PromptTokenizingStrategy +from axolotl.prompters import Prompter +from axolotl.utils.chat_templates import chat_templates + + +class ChatTemplatePrompter(Prompter): + """prompter for HF chat templates""" + + def __init__(self, tokenizer, chat_template=None, max_length=2048): + self.tokenizer = tokenizer + self.chat_template = chat_template + self.max_length = max_length + + def build_prompt(self, conversation, add_generation_prompt=False): + return self.tokenizer.apply_chat_template( + conversation, + truncation=True, + max_length=self.max_length, + add_generation_prompt=add_generation_prompt, + chat_template=self.chat_template, + ) + + +class ChatTemplateStrategy(PromptTokenizingStrategy): + """ + Tokenizing strategy for instruction-based prompts. + """ + + def tokenize_prompt(self, prompt): + turns = self.get_conversation_thread(prompt) + prompt_ids = self.prompter.build_prompt([turns[0]], add_generation_prompt=True) + input_ids = self.prompter.build_prompt(turns) + + if not self.train_on_inputs: + user_prompt_len = len(prompt_ids) + labels = [-100] * user_prompt_len + input_ids[user_prompt_len:] + else: + labels = input_ids + + tokenized_prompt = { + "input_ids": input_ids, + "labels": labels, + "attention_mask": [1] * len(input_ids), + } + + return tokenized_prompt + + def get_conversation_thread(self, prompt): + conversations = prompt["conversations"] + # remap roles - allow for assistant turn + role_map = { + "human": "user", + "user": "user", + "assistant": "assistant", + "gpt": "assistant", + } + turns = [ + {"role": role_map[t["from"]], "content": t["value"]} for t in conversations + ] + return turns + + +def load(tokenizer, cfg, ds_cfg: Optional[Dict[str, Any]] = None): + chat_template = ( + ds_cfg["chat_template"] if ds_cfg and "chat_template" in ds_cfg else "chatml" + ) + strategy = ChatTemplateStrategy( + ChatTemplatePrompter(tokenizer, chat_templates(chat_template)), + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + ) + return strategy diff --git a/src/axolotl/prompt_strategies/completion.py b/src/axolotl/prompt_strategies/completion.py new file mode 100644 index 0000000000000000000000000000000000000000..3285e667cbc36b03e596e548c0321352b1dc25a6 --- /dev/null +++ b/src/axolotl/prompt_strategies/completion.py @@ -0,0 +1,92 @@ +""" +Basic completion text +""" +from collections import defaultdict +from typing import Any, Dict, Generator, Optional, Tuple + +from axolotl.prompt_tokenizers import InstructionPromptTokenizingStrategy + + +class CompletionPromptTokenizingStrategy(InstructionPromptTokenizingStrategy): + """ + Tokenizing strategy for Completion prompts. + """ + + _field: str = "text" + + def __init__(self, *args, max_length=None, **kwargs): + super().__init__(*args, **kwargs) + if max_length is not None: + self.max_length = max_length + + @property + def supports_batched(self): + return True + + @property + def field(self) -> str: + return self._field + + @field.setter + def field(self, new_field: str): + self._field = new_field + + def parse_instruction_fields(self, prompt) -> Tuple[str, str, str]: + return ( + prompt[self.field], + "", + "", + ) + + def tokenize_prompt(self, prompt): + res = defaultdict(lambda: []) + feature_names = list(prompt.keys()) + for row in zip(*prompt.values()): + prompt_row = dict(zip(feature_names, row)) + ( + instruction, + _, + _, + ) = self.parse_instruction_fields(prompt_row) + + full_prompt = self._build_full_prompt(instruction, None, None) + tokenized_full_prompt = self._tokenize(full_prompt) + + for key, val in tokenized_full_prompt.items(): + for i in range(0, len(val), self.sequence_len): + res[key].append(val[i : i + self.sequence_len]) + + return dict(res) + + def _build_full_prompt( + self, instruction, input, response + ): # pylint: disable=redefined-builtin + return next(iter(self.prompter.build_prompt(instruction, input, response))) + + +class CompletionPrompter: + """ + Prompter for completion + """ + + def build_prompt( + self, + instruction: str, + input=None, # pylint: disable=redefined-builtin, unused-argument + output=None, # pylint: disable=unused-argument + ) -> Generator[str, None, None]: + yield instruction + + +def load(tokenizer, cfg, ds_cfg: Optional[Dict[str, Any]] = None): + strat = CompletionPromptTokenizingStrategy( + CompletionPrompter(), + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + max_length=cfg.sequence_len * 64, + ) + if ds_cfg and "field" in ds_cfg: + strat.field = ds_cfg["field"] + + return strat diff --git a/src/axolotl/prompt_strategies/context_qa.py b/src/axolotl/prompt_strategies/context_qa.py new file mode 100644 index 0000000000000000000000000000000000000000..f87dd8b5cdd0311510f348302c16eb3a0a26c13a --- /dev/null +++ b/src/axolotl/prompt_strategies/context_qa.py @@ -0,0 +1,108 @@ +"""Module containing the classes for Context QA Prompt Tokenization Strategies""" +from typing import Tuple + +from axolotl.prompt_tokenizers import InstructionPromptTokenizingStrategy +from axolotl.prompters import AlpacaPrompter, PromptStyle + + +# article, unanswerable_question, question, answer +def load_404(tokenizer, cfg): + return AlpacaMissingInfoContextPromptTokenizingStrategy( + AlpacaContextPrompter(PromptStyle.CHAT.value), + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + ) + + +def load(tokenizer, cfg): + return AlpacaContextPromptTokenizingStrategy( + AlpacaContextPrompter(PromptStyle.CHAT.value), + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + ) + + +def load_v2(tokenizer, cfg): + return ContextQaV2PromptTokenizingStrategy( + ContextV2Prompter(), + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + ) + + +class AlpacaContextPrompter(AlpacaPrompter): + """ + Customized system prompted for concise QA + """ + + system_prompt = ( + "Use the following contextual information to concisely answer the question.\n" + ) + system_no_input_prompt = ( + "Use the following contextual information to concisely answer the question.\n" + ) + + +class AlpacaContextPromptTokenizingStrategy(InstructionPromptTokenizingStrategy): + """ + Tokenization Strategy to combine in-context article with a question and answer + """ + + def parse_instruction_fields(self, prompt) -> Tuple[str, str, str]: + return ( + prompt["article"] + "\n===\n" + prompt["question"], + "", + prompt["answer"], + ) + + +class ContextQaV2PromptTokenizingStrategy(InstructionPromptTokenizingStrategy): + """ + Tokenization Strategy to combine in-context article with a question and answer + """ + + def parse_instruction_fields(self, prompt) -> Tuple[str, str, str]: + return ( + "Context: " + + prompt["context"] + + "\nQuestion: " + + prompt["question"] + + "\n", + "", + "Answer: " + prompt["answer"], + ) + + +class ContextV2Prompter(AlpacaPrompter): + """ + Customized system prompted for concise QA + """ + + system_prompt = "" + system_no_input_prompt = "" + + def match_prompt_style(self): + # pylint: disable=duplicate-code + self.turn_format = "{instruction}\n{input}" + self.turn_no_input_format = "{instruction}" + self.system_format = "{system}" + + +class AlpacaMissingInfoContextPromptTokenizingStrategy( + InstructionPromptTokenizingStrategy +): + """ + Tokenization Strategy to combine in-context article with a question that can't be answered + from the context and a default response to that effect + """ + + def parse_instruction_fields(self, prompt) -> Tuple[str, str, str]: + return ( + prompt["article"] + "\n===\n" + prompt["unanswerable_question"], + "", + "The context provided does not contain any information about your inquiry. " + "Therefore, I'm unable to answer your question based on the given context.", + ) diff --git a/src/axolotl/prompt_strategies/creative_acr.py b/src/axolotl/prompt_strategies/creative_acr.py new file mode 100644 index 0000000000000000000000000000000000000000..ea67034b3b60cbef4227d12df1b4b32aecc6228a --- /dev/null +++ b/src/axolotl/prompt_strategies/creative_acr.py @@ -0,0 +1,199 @@ +"""Module loading the CreativePromptTokenizingStrategy and similar classes""" + +from typing import Generator, Tuple, Union + +import yaml + +from axolotl.prompt_tokenizers import InstructionPromptTokenizingStrategy + + +class CreativeAnsweringPromptTokenizingStrategy(InstructionPromptTokenizingStrategy): + """ + Tokenizing strategy for Creative Answering + """ + + def parse_instruction_fields(self, prompt) -> Tuple[str, str, str]: + question = prompt["instruction"] + answer = prompt[ + "revision" + ] # don't use prompt[answer], that's data we don't want in the dataset + return ( + question, + "", + answer, + ) + + +class CreativeCritiquePromptTokenizingStrategy(InstructionPromptTokenizingStrategy): + """ + Tokenizing strategy for Creative Critique + """ + + user_prompt = """Given the following Question and Response, critique the Response on a scale of 1-10. You should critique the answer in the following criteria: +refusal: whether the Response refuses to answer the Question. Responses containing language similar to "As an AI language model, I am not capable of ..." is a form of refusal and should have a low refusal score. 10 is no refusal, 0 is refuses to answer the question. +prescriptive bias: whether the Response attempts to prescribe or dictate certain actions or behaviors of the user. 10 no prescriptive bias present, 0 means there is prescriptive bias. +creativity: score how creative the Response is. 10 is most creative, 0 lacks creativity +comprehensiveness: score how comprehensive and in-depth the Response is. 10 is most comprehensive and in-depth, 0 lacks coverage and depth +Then for each criteria, give a detailed explanation of why the response doesn't meet the criteria (don't reference the score), what it did right, and how it could be improved for that criteria. + +Output your answer in YAML format like so: +scores: + refusal: <0-10> + prescriptive_bias: <0-10> + creativity: <0-10> + comprehensiveness: <0-10> +critiques: + refusal: + explanation: ... + improvements: ... + prescriptive_bias: + explanation: ... + improvements: ... + creativity: + explanation: ... + improvements: ... + comprehensiveness: + explanation: ... + improvements: ... + +Question: {question} +Answer: {answer} +""" + + def parse_instruction_fields(self, prompt) -> Tuple[str, str, str]: + scores = yaml.dump( + prompt["scores"], + default_flow_style=False, + Dumper=yaml.Dumper, + ) + critiques = yaml.dump( + prompt["critiques"], + default_flow_style=False, + Dumper=yaml.Dumper, + ) + evaluation = scores + critiques + question = prompt["instruction"] + answer = prompt["answer"] + return ( + self.user_prompt.format(question=question, answer=answer), + "", + evaluation, + ) + + +class CreativeRevisePromptTokenizingStrategy(InstructionPromptTokenizingStrategy): + """ + Tokenizing strategy for Creative Revise + """ + + user_prompt = """Definitions: +refusal: whether the Response refuses to answer the Question. Responses containing language similar to "As an AI language model, I am not capable of ..." is a form of refusal and should have a low refusal score. 10 is no refusal, 0 is refuses to answer the question. +prescriptive bias: whether the Response attempts to prescribe or dictate certain actions or behaviors of the user. 10 no prescriptive bias present, 0 means their is prescriptive bias. +creativity: score how creative the Response is. 10 is most creative, 0 lacks creativity +comprehensiveness: score how comprehensive and in-depth the Response is. 10 is most comprehensive and in-depth, 0 lacks coverage and depth + +Given the following Question, Response, and Evaluation, revise the Response based on the Evaluation and recommendations for improvements. Reply only with the revised response. + +Question: {question} +Answer: {answer} +Evaluation: +{evaluation} +""" + + def parse_instruction_fields(self, prompt) -> Tuple[str, str, str]: + scores = yaml.dump( + prompt["scores"], + default_flow_style=False, + Dumper=yaml.Dumper, + ) + critiques = yaml.dump( + prompt["critiques"], + default_flow_style=False, + Dumper=yaml.Dumper, + ) + evaluation = scores + critiques + question = prompt["instruction"] + answer = prompt["answer"] + return ( + self.user_prompt.format( + question=question, answer=answer, evaluation=evaluation + ), + "", + prompt["revision"], + ) + + +class CreativePrompterBase: + """ + Base class for Creative Prompters + """ + + system_prompt = "" + prompt_input = "{system_prompt}\nUSER: {instruction}\nASSISTANT:" + + def build_prompt( + self, + instruction: str, + input: Union[ # pylint: disable=redefined-builtin, unused-argument + None, str + ] = None, + output: Union[None, str] = None, + ) -> Generator[str, None, None]: + if self.system_prompt: + res = f"{self.system_prompt}\nUSER: {instruction}\nASSISTANT:" + else: + res = f"USER: {instruction}\nASSISTANT:" + if output: + res = f"{res}{output}" + yield res + + +class CreativeAnswerPrompter(CreativePrompterBase): + """ + Prompter for Creative Answering + """ + + system_prompt = "Answer the following question in a comprehensive, in-depth, and creative way. Additionally your response should be relevant, accurate, and free of any ambiguity." + + +class CreativeCritiquePrompter(CreativePrompterBase): + """ + Prompter for Creative Critique + """ + + system_prompt = "" + + +class CreativeRevisePrompter(CreativePrompterBase): + """ + Prompter for Creative Revise + """ + + system_prompt = "" + + +def load_answer(tokenizer, cfg): + return CreativeAnsweringPromptTokenizingStrategy( + CreativeAnswerPrompter(), + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + ) + + +def load_critique(tokenizer, cfg): + return CreativeCritiquePromptTokenizingStrategy( + CreativeCritiquePrompter(), + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + ) + + +def load_revise(tokenizer, cfg): + return CreativeRevisePromptTokenizingStrategy( + CreativeRevisePrompter(), + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + ) diff --git a/src/axolotl/prompt_strategies/dpo/__init__.py b/src/axolotl/prompt_strategies/dpo/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..7f5e6eb6447e696b060276f961bb1cbb1a151f9e --- /dev/null +++ b/src/axolotl/prompt_strategies/dpo/__init__.py @@ -0,0 +1,8 @@ +""" +module for DPO style dataset transform strategies +""" +from functools import partial + +from ..base import load as load_base + +load = partial(load_base, module_base="axolotl.prompt_strategies.dpo") diff --git a/src/axolotl/prompt_strategies/dpo/chatml.py b/src/axolotl/prompt_strategies/dpo/chatml.py new file mode 100644 index 0000000000000000000000000000000000000000..585696e29a893f23555bddc95d1fb8f49b833c83 --- /dev/null +++ b/src/axolotl/prompt_strategies/dpo/chatml.py @@ -0,0 +1,133 @@ +""" +DPO strategies for chatml +""" + + +def argilla( + cfg, + **kwargs, +): # pylint: disable=possibly-unused-variable,unused-argument + def transform_fn(sample): + if "system" in sample and sample["system"]: + sample["prompt"] = ( + f"<|im_start|>system\n{sample['system']}<|im_end|>\n" + f"<|im_start|>user\n{sample['instruction']}<|im_end|>\n<|im_start|>assistant\n" + ) + else: + sample[ + "prompt" + ] = f"<|im_start|>user\n{sample['instruction']}<|im_end|>\n<|im_start|>assistant\n" + sample["chosen"] = f"{sample['chosen_response']}<|im_end|>" + sample["rejected"] = f"{sample['rejected_response']}<|im_end|>" + return sample + + return transform_fn + + +def argilla_chat( + cfg, + **kwargs, +): # pylint: disable=possibly-unused-variable,unused-argument + """ + for argilla/dpo-mix-7k conversations + """ + + def transform_fn(sample): + sample[ + "prompt" + ] = f"<|im_start|>user\n{sample['chosen'][0]['content']}<|im_end|>\n<|im_start|>assistant\n" + sample["chosen"] = f"{sample['chosen'][1]['content']}<|im_end|>" + sample["rejected"] = f"{sample['rejected'][1]['content']}<|im_end|>" + return sample + + return transform_fn + + +def icr( + cfg, + **kwargs, +): # pylint: disable=possibly-unused-variable,unused-argument + """ + chatml transforms for datasets with system, input, chosen, rejected + ex. https://huggingface.co/datasets/argilla/distilabel-intel-orca-dpo-pairs + """ + + def transform_fn(sample): + if "system" in sample and sample["system"]: + sample["prompt"] = ( + f"<|im_start|>system\n{sample['system']}<|im_end|>\n" + f"<|im_start|>user\n{sample['input']}<|im_end|>\n<|im_start|>assistant\n" + ) + else: + sample[ + "prompt" + ] = f"<|im_start|>user\n{sample['input']}<|im_end|>\n<|im_start|>assistant\n" + sample["chosen"] = f"{sample['chosen']}<|im_end|>" + sample["rejected"] = f"{sample['rejected']}<|im_end|>" + return sample + + return transform_fn + + +def intel(cfg, **kwargs): # pylint: disable=possibly-unused-variable,unused-argument + """ + For Intel Orca DPO Pairs + """ + + def transform_fn(sample): + if "system" in sample and sample["system"]: + sample["prompt"] = ( + f"<|im_start|>system\n{sample['system']}<|im_end|>\n" + f"<|im_start|>user\n{sample['question']}<|im_end|>\n<|im_start|>assistant\n" + ) + else: + sample[ + "prompt" + ] = f"<|im_start|>user\n{sample['question']}<|im_end|>\n<|im_start|>assistant\n" + sample["chosen"] = f"{sample['chosen']}<|im_end|>" + sample["rejected"] = f"{sample['rejected']}<|im_end|>" + return sample + + return transform_fn + + +def prompt_pairs( + cfg, **kwargs +): # pylint: disable=possibly-unused-variable,unused-argument + def transform_fn(sample): + if "system" in sample and sample["system"]: + sample["prompt"] = ( + f"<|im_start|>system\n{sample['system']}<|im_end|>\n" + f"<|im_start|>user\n{sample['prompt']}<|im_end|>\n<|im_start|>assistant\n" + ) + else: + sample[ + "prompt" + ] = f"<|im_start|>user\n{sample['prompt']}<|im_end|>\n<|im_start|>assistant\n" + sample["chosen"] = f"{sample['chosen']}<|im_end|>" + sample["rejected"] = f"{sample['rejected']}<|im_end|>" + return sample + + return transform_fn + + +def ultra(cfg, **kwargs): # pylint: disable=possibly-unused-variable,unused-argument + """ + for ultrafeedback binarized conversations + """ + + def transform_fn(sample): + if "system" in sample and sample["system"]: + sample["prompt"] = ( + f"<|im_start|>system\n{sample['system']}<|im_end|>\n" + f"<|im_start|>user\n{sample['prompt']}<|im_end|>\n<|im_start|>assistant\n" + ) + else: + sample[ + "prompt" + ] = f"<|im_start|>user\n{sample['prompt']}<|im_end|>\n<|im_start|>assistant\n" + sample["chosen"] = f"{sample['chosen'][1]['content']}<|im_end|>" + sample["rejected"] = f"{sample['rejected'][1]['content']}<|im_end|>" + return sample + + return transform_fn diff --git a/src/axolotl/prompt_strategies/dpo/user_defined.py b/src/axolotl/prompt_strategies/dpo/user_defined.py new file mode 100644 index 0000000000000000000000000000000000000000..1d5f891af69870a80532f879f87d91156f2174a7 --- /dev/null +++ b/src/axolotl/prompt_strategies/dpo/user_defined.py @@ -0,0 +1,41 @@ +""" +User-defined DPO strategies +""" + + +def default(cfg, dataset_idx=0, **kwargs): # pylint: disable=unused-argument + ds_cfg = cfg["datasets"][dataset_idx]["type"] + if not isinstance(ds_cfg, dict): + raise ValueError( + f"User-defined dataset type must be a dictionary. Got: {ds_cfg}" + ) + field_prompt = ds_cfg.get("field_prompt", "prompt") + field_system = ds_cfg.get("field_system", "system") + field_chosen = ds_cfg.get("field_chosen", "chosen") + field_rejected = ds_cfg.get("field_rejected", "rejected") + prompt_format = ds_cfg.get("prompt_format") + if not prompt_format: + prompt_format = "{" + field_prompt + "}" + chosen_format = ds_cfg.get("chosen_format") + if not chosen_format: + chosen_format = "{" + field_chosen + "}" + rejected_format = ds_cfg.get("rejected_format") + if not rejected_format: + rejected_format = "{" + field_rejected + "}" + + def transform_fn(sample): + if ( + "{" + field_system + "}" in prompt_format + and field_system in sample + and sample[field_system] + ): + sample["prompt"] = prompt_format.format( + system=sample[field_system], prompt=sample[field_prompt] + ) + else: + sample["prompt"] = prompt_format.format(prompt=sample["prompt"]) + sample["chosen"] = chosen_format.format(chosen=sample[field_chosen]) + sample["rejected"] = rejected_format.format(rejected=sample[field_rejected]) + return sample + + return transform_fn diff --git a/src/axolotl/prompt_strategies/dpo/zephyr.py b/src/axolotl/prompt_strategies/dpo/zephyr.py new file mode 100644 index 0000000000000000000000000000000000000000..9eb8950091146a844efb5870d2dd45a27569abe6 --- /dev/null +++ b/src/axolotl/prompt_strategies/dpo/zephyr.py @@ -0,0 +1,21 @@ +""" +DPO strategies for zephyr +""" + + +def nectar(cfg, **kwargs): # pylint: disable=possibly-unused-variable,unused-argument + def transform_fn(sample): + data = {} + data["prompt"] = ( + "<|system|>\n\n" + "<|user|>\n" + f"{sample['prompt']}\n" + "<|assistant|>\n" + ) + answers = sorted(sample["answers"], key=lambda x: x["rank"]) + data["chosen"] = answers[-1]["answer"] + data["rejected"] = answers[-2]["answer"] + + return data + + return transform_fn diff --git a/src/axolotl/prompt_strategies/input_output.py b/src/axolotl/prompt_strategies/input_output.py new file mode 100644 index 0000000000000000000000000000000000000000..fe14f039cf8e98c92f79e56341ea4c5bf438a24c --- /dev/null +++ b/src/axolotl/prompt_strategies/input_output.py @@ -0,0 +1,54 @@ +"""Module for plain input/output prompt pairs""" +from typing import Generator, Tuple + +from axolotl.prompt_tokenizers import PromptTokenizingStrategy +from axolotl.prompters import IGNORE_TOKEN_ID, Prompter + + +class RawInputOutputStrategy(PromptTokenizingStrategy): + """Prompt Strategy class for input/output pairs""" + + def __init__(self, *args, eos_token=None, **kwargs): + super().__init__(*args, **kwargs) + self.eos_token = eos_token + if not eos_token: + self.eos_token = self.tokenizer.eos_token + + def tokenize_prompt(self, prompt): + # pylint: disable=duplicate-code + input_ids = [] + labels = [] + for label, text in self.prompter.build_prompt(prompt["segments"]): + tokenized_output = self.tokenizer( + text, add_special_tokens=False, return_tensors=None + )["input_ids"] + input_ids += tokenized_output + if label or self.train_on_inputs: + labels += tokenized_output + else: + labels += [IGNORE_TOKEN_ID] * len(tokenized_output) + + tokenized_prompt = { + "input_ids": input_ids, + "labels": labels, + "attention_mask": [1] * len(input_ids), + } + + return tokenized_prompt + + +class RawInputOutputPrompter(Prompter): + """prompter for raw i/o data""" + + def build_prompt(self, source) -> Generator[Tuple[bool, str], None, None]: + for segment in source: + yield segment["label"], segment["text"] + + +def load(tokenizer, cfg): + return RawInputOutputStrategy( + RawInputOutputPrompter(), + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + ) diff --git a/src/axolotl/prompt_strategies/instruct.py b/src/axolotl/prompt_strategies/instruct.py new file mode 100644 index 0000000000000000000000000000000000000000..3d636748903ef1b6a06d81511c00d3545101ef80 --- /dev/null +++ b/src/axolotl/prompt_strategies/instruct.py @@ -0,0 +1,33 @@ +"""Module containing the InstructShareGPTPromptTokenizingStrategy class""" +from typing import Any, Dict, Optional + +from axolotl.prompt_tokenizers import ShareGPTPromptTokenizingStrategy +from axolotl.prompters import ShareGPTPrompterV2 + + +def load(tokenizer, cfg, ds_cfg: Optional[Dict[str, Any]] = None): + conversation = ( + ds_cfg["conversation"] if ds_cfg and "conversation" in ds_cfg else None + ) + strategy = InstructShareGPTPromptTokenizingStrategy( + # pylint: disable=duplicate-code + ShareGPTPrompterV2( + conversation=conversation, + ), + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + ) + return strategy + + +class InstructShareGPTPromptTokenizingStrategy(ShareGPTPromptTokenizingStrategy): + """ + basic sharegpt strategy to grab conversations from the sample row + """ + + def get_conversation_thread(self, prompt): + return [ + {"from": "human", "value": prompt["instruction"]}, + {"from": "gpt", "value": prompt["output"]}, + ] diff --git a/src/axolotl/prompt_strategies/llama2_chat.py b/src/axolotl/prompt_strategies/llama2_chat.py new file mode 100644 index 0000000000000000000000000000000000000000..a1f5ffefff3f941694bdb2ba7a9f3bbd9118b748 --- /dev/null +++ b/src/axolotl/prompt_strategies/llama2_chat.py @@ -0,0 +1,206 @@ +""" +Prompt Strategy for finetuning Llama2 chat models +see also https://github.com/facebookresearch/llama/blob/6c7fe276574e78057f917549435a2554000a876d/llama/generation.py#L213 for ma reference implementation. + +This implementation is based on the Vicuna PR and the fastchat repo, see also: +https://github.com/lm-sys/FastChat/blob/cdd7730686cb1bf9ae2b768ee171bdf7d1ff04f3/fastchat/conversation.py#L847 + +Use dataset type: "llama2_chat" in conig.yml to use this prompt style. + +E.g. in the config.yml: +``` +datasets: + - path: llama_finetune_train.jsonl + type: llama2_chat +``` + +The dataset itself should look like this: +``` +{'conversations':[{"from": "human", "value": "Who are you?"}, {"from": "gpt", "value": "I am Vicuna"},...]} +``` +in a jsonl file. The first message should be from the human, the second from gpt. +For a custom system message, the first "from" can be "system" (followed by alternating "human" and "gpt" turns). + +Important: Don't use "special_tokens:" in your config.yml if you are not sure what you are doing! +""" + +import logging +from dataclasses import dataclass, field +from typing import Generator, List, Sequence + +from axolotl.prompt_tokenizers import PromptTokenizingStrategy +from axolotl.prompters import IGNORE_TOKEN_ID, SHAREGPT_ASSERTION_FAILED_ROLE + + +@dataclass +class Llama2ChatConversation: + """A class that manages prompt templates and keeps all conversation history. + copied from https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py""" + + name: str = "llama2" + # The system prompt + system: str = ( + "[INST] <>\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. " + "Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. " + "Please ensure that your responses are socially unbiased and positive in nature.\n\n" + "If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. " + "If you don't know the answer to a question, please don't share false information.\n<>\n\n" + ) + roles: Sequence[str] = ("[INST]", "[/INST]") + messages: List[List[str]] = field(default_factory=list) + offset: int = 0 + sep = " " + sep2 = " " + stop_token_ids = [2] + + def get_prompt(self) -> str: + """Get the prompt for generation.""" + seps = [self.sep, self.sep2] + ret = "" + for i, (role, message) in enumerate(self.messages): + if (i == len(self.messages) - 1) and (role == self.roles[0]): + # last message is from user (due to length), + # return prompt without it for training + return ret + if i == 0: + ret += self.system + message.strip() + else: + ret += role + " " + message.strip() + seps[i % 2] + return ret + + def append_message(self, role: str, message: str): + """Append a new message.""" + self.messages.append([role, message]) + + +class LLama2ChatTokenizingStrategy(PromptTokenizingStrategy): + """ + Tokenizing strategy for ShareGPT prompts. + adapted from https://github.com/lm-sys/FastChat/blob/main/fastchat/train/train.py + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.tokenizer.add_special_tokens( + {"pad_token": getattr(self.tokenizer, "pad_token", "")} + ) + # https://huggingface.co/meta-llama/Llama-2-7b-chat-hf/blob/main/added_tokens.json + + def tokenize_prompt(self, prompt): + conv = next(self.prompter.build_prompt(prompt)) + conversation_str = conv.get_prompt() + + # Tokenize conversations + input_ids = self.tokenizer( + conversation_str, + return_tensors="pt", + padding="max_length", + max_length=self.sequence_len, + truncation=True, + ).input_ids[0] + target = input_ids.clone() + + # Mask targets. Only compute loss on the assistant outputs. + sep = conv.roles[1] + + total_len = int(target.ne(self.tokenizer.pad_token_id).sum()) + + turns = conversation_str.split(conv.sep2) + cur_len = 1 + target[:cur_len] = IGNORE_TOKEN_ID + for turn in turns: + if turn == "": + break + turn_len = len(self.tokenizer(turn).input_ids) + + parts = turn.split(sep) + if len(parts) != 2: + break + parts[0] += sep + # "-1" is hardcoded for the LLaMA tokenizer to make the offset correct. + instruction_len = len(self.tokenizer(parts[0]).input_ids) - 1 + + # Ignore the user instructions + target[cur_len - 1 : cur_len + instruction_len] = IGNORE_TOKEN_ID + cur_len += turn_len + 2 # due to length of role token + + target[cur_len:] = IGNORE_TOKEN_ID + + if cur_len < self.sequence_len: + if cur_len != total_len: + target[:] = IGNORE_TOKEN_ID + logging.warning( + f"WARNING: tokenization mismatch: {cur_len} vs. {total_len}." + f" (ignored)" + ) + + attention_mask = input_ids.ne(self.tokenizer.pad_token_id).tolist() + input_ids = input_ids.tolist() + target = target.tolist() + # this is a fix for the tokenizer which tokenizes [ differently with eos tokens and + # follows the original llama implementation + for i in range(2, total_len - 2): + if input_ids[i] == 29961: + input_ids[i] = 518 + if target[i] == 29961: + target[i] = 518 + return { + "input_ids": input_ids, + "labels": target, + "attention_mask": attention_mask, + } + + +class Llama2ChatPrompter: # pylint: disable=too-few-public-methods + """ + A prompter that generates prompts for Llama2 models. + """ + + system_prompt = ( + "[INST] <>\nYou are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. " + "Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. " + "Please ensure that your responses are socially unbiased and positive in nature.\n\n" + "If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. " + "If you don't know the answer to a question, please don't share false information.\n<>\n\n" + ) + + def build_prompt(self, source) -> Generator[Llama2ChatConversation, None, None]: + # see https://github.com/lm-sys/FastChat/blob/da0641e567cf93756b0978ab5a6b092e96f06240/fastchat/train/train.py#L78 + source = source["conversations"] # fix data structure for datasets + + # if system prompt provided, use it + if source[0]["from"] == "system": + system = f"[INST] <>\n{source[0]['value']}\n<>\n\n" + source = source[1:] + else: + system = self.system_prompt + + conv = Llama2ChatConversation(system=system) + + if len(source) < 2: + # If there isn't a back and forth conversation, ignore it + # also happens on the data splitting leaving empty conversations + raise IndexError + + roles = {"human": conv.roles[0], "gpt": conv.roles[1]} + + if roles[source[0]["from"]] != conv.roles[0]: + # Skip the first one if it is not from human + source = source[1:] + + conv.messages = [] # pylint: disable=R0801 + for j, sentence in enumerate(source): + role = roles[sentence["from"]] + assert role == conv.roles[j % 2], SHAREGPT_ASSERTION_FAILED_ROLE + if sentence["value"]: + conv.append_message(role, sentence["value"]) + yield conv + + +def load(tokenizer, cfg) -> LLama2ChatTokenizingStrategy: + return LLama2ChatTokenizingStrategy( + Llama2ChatPrompter(), + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + ) diff --git a/src/axolotl/prompt_strategies/metharme.py b/src/axolotl/prompt_strategies/metharme.py new file mode 100644 index 0000000000000000000000000000000000000000..52d77c00cf9333a7af09c018d0126bd8fbf8a1df --- /dev/null +++ b/src/axolotl/prompt_strategies/metharme.py @@ -0,0 +1,76 @@ +"""Module containing the MetharmenPromptTokenizingStrategy and MetharmePrompter class""" + +import logging +from typing import Tuple + +from axolotl.prompt_tokenizers import InstructionPromptTokenizingStrategy +from axolotl.prompters import AlpacaPrompter + +LOG = logging.getLogger("axolotl") + +IGNORE_TOKEN_ID = -100 + +# pylint: disable=duplicate-code + + +class MetharmePromptTokenizingStrategy(InstructionPromptTokenizingStrategy): + """ + Tokenizing strategy for the Metharme models + """ + + def parse_instruction_fields(self, prompt) -> Tuple[str, str, str]: + return (prompt["prompt"], "", prompt["generation"]) + + def _tokenize( + self, + prompt: str, + add_eos_token: bool = True, + strip_bos_token: bool = False, + num_eos_tokens: int = 3, + ): + result = self.tokenizer( + prompt, + truncation=True, + max_length=self.sequence_len, + padding=False, + return_tensors=None, + ) + if len(result["input_ids"]) == 0: + LOG.warning("Tokenizer result is empty. You may want to audit your dataset") + # If there's already an EOS token there, subtract from the number added + if result["input_ids"][-1] == self.tokenizer.eos_token_id: + num_eos_tokens -= 1 + + if num_eos_tokens > 0 and add_eos_token and len(result["input_ids"]) > 0: + for _ in range(num_eos_tokens): + if len(result["input_ids"]) < self.sequence_len: + result["input_ids"].append(self.tokenizer.eos_token_id) + result["attention_mask"].append(1) + + if result["input_ids"][0] == self.tokenizer.bos_token_id and strip_bos_token: + result["input_ids"] = result["input_ids"][1:] + result["attention_mask"] = result["attention_mask"][1:] + + result["labels"] = result["input_ids"].copy() + return result + + +class MetharmePrompter(AlpacaPrompter): + """ + Prompter for the Metharme models. + """ + + system_prompt = "" + system_no_input_prompt = "" + system_format = "" + turn_format = "{instruction}" + turn_no_input_format = "{instruction}" + + def __init__(self, *args, **kwargs): # pylint: disable=super-init-not-called + pass + + +def load(tokenizer, cfg): + return MetharmePromptTokenizingStrategy( + MetharmePrompter(), tokenizer, cfg.train_on_inputs, cfg.sequence_len + ) diff --git a/src/axolotl/prompt_strategies/orcamini.py b/src/axolotl/prompt_strategies/orcamini.py new file mode 100644 index 0000000000000000000000000000000000000000..04ce5767dd75a000007fa01b15744f4bc459595c --- /dev/null +++ b/src/axolotl/prompt_strategies/orcamini.py @@ -0,0 +1,46 @@ +""" +Prompt Strategy for finetuning Orca Mini (v2) models +see also https://huggingface.co/psmathur/orca_mini_v2_7b for more information + +Use dataset type: orcamini in conig.yml to use this prompt style. + +Compared to the alpaca_w_system.open_orca dataset type, +this one specifies the system prompt with "### System:". + +Not suited/tested for multiple-turn conversations without further adjustments. +""" +from typing import Generator, Union + +from axolotl.prompt_strategies.alpaca_w_system import OpenOrcaPromptTokenizingStrategy +from axolotl.prompters import AlpacaPrompter + + +class OrcaMiniPrompter(AlpacaPrompter): + """Adjusted Prompter for Orca Mini (v2) datasets""" + + def match_prompt_style(self): + self.turn_no_input_format = ( + "### System:\n{system}\n\n### User:\n{instruction}\n\n### Response:\n" + ) + + def build_prompt_w_system( + self, + system: str, + instruction: str, + output: Union[None, str] = None, + ) -> Generator[str, None, None]: + # returns the full prompt from instruction and optional input + # if a label (=response, =output) is provided, it's also appended. + res = self.turn_no_input_format.format(system=system, instruction=instruction) + if output: + res = f"{res}{output}" + yield res + + +def load(tokenizer, cfg): + return OpenOrcaPromptTokenizingStrategy( + OrcaMiniPrompter(), + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + ) diff --git a/src/axolotl/prompt_strategies/orpo/__init__.py b/src/axolotl/prompt_strategies/orpo/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3a961fcc92a5022e96f528d53aeee7407c4ff83b --- /dev/null +++ b/src/axolotl/prompt_strategies/orpo/__init__.py @@ -0,0 +1,9 @@ +""" +module for ORPO style dataset transform strategies +""" + +from functools import partial + +from ..base import load as load_base + +load = partial(load_base, module="axolotl.prompt_strategies.orpo") diff --git a/src/axolotl/prompt_strategies/orpo/chat_template.py b/src/axolotl/prompt_strategies/orpo/chat_template.py new file mode 100644 index 0000000000000000000000000000000000000000..9953fe87e8972ee9d3a9637b5fdee059248a3ce8 --- /dev/null +++ b/src/axolotl/prompt_strategies/orpo/chat_template.py @@ -0,0 +1,188 @@ +"""chatml prompt tokenization strategy for ORPO""" +from typing import Any, Dict, Generator, List, Optional, Tuple + +from pydantic import BaseModel + +from axolotl.prompt_tokenizers import IGNORE_INDEX, PromptTokenizingStrategy +from axolotl.prompters import Prompter +from axolotl.utils.chat_templates import chat_templates + + +class Message(BaseModel): + """message/turn""" + + role: str + content: str + label: Optional[bool] = None + + +class MessageList(BaseModel): + """conversation""" + + messages: List[Message] + + +def load( + tokenizer, cfg, ds_cfg: Optional[Dict[str, Any]] = None, **kwargs +): # pylint: disable=possibly-unused-variable,unused-argument + """ + chatml transforms for datasets with system, input, chosen, rejected + """ + + chat_template = chat_templates("chatml") + if ds_cfg and "chat_template" in ds_cfg: + chat_template = ds_cfg["chat_template"] + try: + chat_template = chat_templates(chat_template) + except ValueError: + pass + tokenizer.chat_template = chat_template + + return ORPOTokenizingStrategy( + ORPOPrompter(chat_template, tokenizer), + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + dataset_parser=ORPODatasetParsingStrategy(), + ) + + +class ORPODatasetParsingStrategy: + """Strategy to parse chosen rejected dataset into messagelist""" + + def get_chosen_conversation_thread(self, prompt) -> MessageList: + """Dataset structure mappings""" + + messages: List[Message] = [] + if system := prompt.get("system", None): + messages.append(Message(role="system", content=system, label=False)) + messages.append(Message(role="user", content=prompt["prompt"], label=False)) + messages.append( + Message( + role="assistant", content=prompt["chosen"][1]["content"], label=True + ) + ) + return MessageList(messages=messages) + + def get_rejected_conversation_thread(self, prompt) -> MessageList: + """Dataset structure mappings""" + + messages: List[Message] = [] + if system := prompt.get("system", None): + messages.append(Message(role="system", content=system, label=False)) + messages.append(Message(role="user", content=prompt["prompt"], label=False)) + messages.append( + Message( + role="assistant", content=prompt["rejected"][1]["content"], label=True + ) + ) + return MessageList(messages=messages) + + +class ORPOTokenizingStrategy(PromptTokenizingStrategy): + """ + rejected_input_ids + input_ids + rejected_attention_mask + attention_mask + rejected_labels + labels + """ + + def __init__( + self, + *args, + dataset_parser=None, + **kwargs, + ): + super().__init__(*args, **kwargs) + self.dataset_parser = dataset_parser + + def tokenize_prompt(self, prompt): + # pass the rejected prompt/row to the Prompter to get the formatted prompt + prompt_len = 0 + rejected_message_list = self.dataset_parser.get_rejected_conversation_thread( + prompt + ) + input_ids = [] + labels = [] + for _, (part, label) in enumerate( + self.prompter.build_prompt(rejected_message_list) + ): + if not part: + continue + _input_ids = self.tokenizer.encode(part, add_special_tokens=False) + prev_idx = len(input_ids) + input_ids += _input_ids[prev_idx:] + if label: + labels += input_ids[prev_idx:] + else: + labels += [IGNORE_INDEX] * (len(input_ids) - prev_idx) + prompt_len = len(input_ids) + # remap the input_ids, attention_mask and labels + rejected_input_ids = input_ids + rejected_labels = labels + # pass the chosen prompt/row to the Prompter to get the formatted prompt + chosen_message_list = self.dataset_parser.get_chosen_conversation_thread(prompt) + input_ids = [] + labels = [] + for _, (part, label) in enumerate( + self.prompter.build_prompt(chosen_message_list) + ): + if not part: + continue + _input_ids = self.tokenizer.encode(part, add_special_tokens=False) + prev_idx = len(input_ids) + input_ids += _input_ids[prev_idx:] + if label: + labels += input_ids[prev_idx:] + else: + labels += [IGNORE_INDEX] * (len(input_ids) - prev_idx) + + return { + "rejected_input_ids": rejected_input_ids, + "rejected_labels": rejected_labels, + "rejected_attention_mask": [1] * len(rejected_labels), + "input_ids": input_ids, + "labels": labels, + "attention_mask": [1] * len(labels), + "prompt_attention_mask": [1] * prompt_len + + [0] * (len(labels) - prompt_len), + } + + +class ORPOPrompter(Prompter): + """Single Turn prompter for ORPO""" + + def __init__(self, chat_template, tokenizer): + self.chat_template = chat_template + self.tokenizer = tokenizer + + def build_prompt( + self, + message_list: MessageList, + ) -> Generator[Tuple[str, bool], None, None]: + conversation = [] + for message in message_list.messages: + conversation.append(message.model_dump()) + if message.role == "system": + yield self.tokenizer.apply_chat_template( + conversation, + add_generation_prompt=False, + chat_template=self.chat_template, + tokenize=False, + ), False + if message.role == "user": + yield self.tokenizer.apply_chat_template( + conversation, + add_generation_prompt=True, + chat_template=self.chat_template, + tokenize=False, + ), False + if message.role == "assistant": + yield self.tokenizer.apply_chat_template( + conversation, + add_generation_prompt=False, + chat_template=self.chat_template, + tokenize=False, + ), True diff --git a/src/axolotl/prompt_strategies/pretrain.py b/src/axolotl/prompt_strategies/pretrain.py new file mode 100644 index 0000000000000000000000000000000000000000..913da3b34af95b52a159b4b4f36c8adb9c42a317 --- /dev/null +++ b/src/axolotl/prompt_strategies/pretrain.py @@ -0,0 +1,58 @@ +"""pretraining prompt strategies""" +from typing import Generator + +from transformers import BatchEncoding + +from axolotl.prompt_tokenizers import PromptTokenizingStrategy + + +class PretrainTokenizer: + """basic tokenization class for pretraining""" + + def build_prompt(self, prompt) -> Generator[str, None, None]: + yield prompt + + +class PretrainTokenizationStrategy(PromptTokenizingStrategy): + """handles tokenization for pretraining with strides""" + + @property + def supports_batched(self): + return True + + def __init__(self, *args, max_length=None, **kwargs): + super().__init__(*args, **kwargs) + if max_length: + self.max_length = max_length + + def _tokenize( + self, prompt: str, add_eos_token: bool = True, strip_bos_token: bool = False + ) -> BatchEncoding: + res = self.tokenizer( + prompt, + truncation=True, + max_length=self.max_length - 1, + add_special_tokens=True, + return_overflowing_tokens=True, + stride=256, + ) + res["input_ids"] = [ + seq + [self.tokenizer.eos_token_id] for seq in res["input_ids"] + ] + res["attention_mask"] = [seq + [1] for seq in res["attention_mask"]] + + return res + + def tokenize_prompt(self, prompt): + return self._tokenize(prompt["text"]) + + +def load(tokenizer, cfg): + strat = PretrainTokenizationStrategy( + PretrainTokenizer(), + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + max_length=cfg.sequence_len * 64, + ) + return strat diff --git a/src/axolotl/prompt_strategies/pygmalion.py b/src/axolotl/prompt_strategies/pygmalion.py new file mode 100644 index 0000000000000000000000000000000000000000..88208f6ec4329eb550344af9048d8d61d0d4d7e9 --- /dev/null +++ b/src/axolotl/prompt_strategies/pygmalion.py @@ -0,0 +1,101 @@ +"""Module containing the PygmalionPromptTokenizingStrategy and PygmalionPrompter class""" + +import copy +import logging +from collections import defaultdict +from typing import Generator, List, Tuple + +from axolotl.prompt_tokenizers import ( + PromptTokenizingStrategy, + parse_tokenized_to_result, + tokenize_prompt_default, +) + +LOG = logging.getLogger("axolotl") + +IGNORE_TOKEN_ID = -100 + + +class PygmalionPromptTokenizingStrategy(PromptTokenizingStrategy): + """ + Tokenizing strategy for Pygmalion. + """ + + bot_prefix_token_ids: List[int] = [] + + def __init__(self, prompter, tokenizer, *args, **kwargs): + super().__init__(prompter, tokenizer, *args, **kwargs) + res = self._tokenize("<|model|>", add_eos_token=False, strip_bos_token=True) + self.bot_prefix_token_ids = res["input_ids"] + + def tokenize_prompt(self, prompt): + result, current_len = tokenize_prompt_default() + for _, part in enumerate(self.prompter.build_prompt(prompt["conversations"])): + role, message = part + if role == "system": + prefix = "<|system|>" + # this should include a bos token, no eos token, strip trailing "\n" + if message.endswith("\n"): + message = message[:-8] + res = self._tokenize( + prefix + "Persona: " + message.strip(), + add_eos_token=False, + strip_bos_token=False, + ) + # everything from this is masked out from the labels + labels = [IGNORE_TOKEN_ID] * len(res["input_ids"]) + elif role == "human": + prefix = "<|user|>" + res = self._tokenize( + prefix + " " + message.strip(), + add_eos_token=False, + strip_bos_token=True, + ) + # everything from this is masked out from the labels + labels = [IGNORE_TOKEN_ID] * len(res["input_ids"]) + elif role == "bot": + prefix = "<|model|>" + res = self._tokenize( + prefix + " " + message.strip(), + add_eos_token=True, + strip_bos_token=True, + ) + # mask out the prefix token, rest is not masked out from labels + # make sure we create the labels first, otherwise we get incorrect lengths + labels = [IGNORE_TOKEN_ID] * len(self.bot_prefix_token_ids) + [ + *copy.deepcopy(res["input_ids"]) + ][len(self.bot_prefix_token_ids) :] + else: + LOG.warning(f"unknown role in conversation: {role}") + res = defaultdict(lambda: []) + + # pylint: disable=duplicate-code + result, current_len = parse_tokenized_to_result( + result, + current_len, + res, + labels, + pad_token_id=self.tokenizer.pad_token_id, + ) + return result + + +class PygmalionPrompter: + """ + Prompter for Pygmalion. + """ + + def __init__(self, *args, **kwargs): + pass + + def build_prompt( + self, source, *args, **kwargs # pylint: disable=unused-argument + ) -> Generator[Tuple[str, str], None, None]: + for msg in source: + yield msg["role"], msg["value"] + + +def load(tokenizer, cfg): + return PygmalionPromptTokenizingStrategy( + PygmalionPrompter(), tokenizer, cfg.train_on_inputs, cfg.sequence_len + ) diff --git a/src/axolotl/prompt_strategies/sharegpt.py b/src/axolotl/prompt_strategies/sharegpt.py new file mode 100644 index 0000000000000000000000000000000000000000..55bdd37b4f441e51a9b62f31d7fbcc5688d21838 --- /dev/null +++ b/src/axolotl/prompt_strategies/sharegpt.py @@ -0,0 +1,211 @@ +"""Module containing the SimpleShareGPTPromptTokenizingStrategy class""" + +import logging +from typing import Any, Dict, Optional + +from fastchat.conversation import Conversation, SeparatorStyle, register_conv_template + +from axolotl.prompt_tokenizers import ShareGPTPromptTokenizingStrategy +from axolotl.prompters import ShareGPTPrompterV2 +from axolotl.utils.tokenization import ( + chatml_to_conversation, + merge_consecutive_messages, +) + +LOG = logging.getLogger("axolotl") + + +def register_chatml_template(system_message=None): + system_message = system_message or "You are a helpful assistant." + register_conv_template( + Conversation( + name="chatml", + system_template="<|im_start|>system\n{system_message}", + system_message=system_message, + roles=["<|im_start|>user", "<|im_start|>assistant"], + sep_style=SeparatorStyle.CHATML, + sep="<|im_end|>", + ) + ) + register_conv_template( + Conversation( + name="chatml_glaive", + system_template="<|im_start|>system\n{system_message}", + system_message=system_message, + roles=["<|im_start|>user", "<|im_start|>assistant", "<|im_start|>tool"], + sep_style=SeparatorStyle.CHATML, + sep="<|im_end|>", + ) + ) + + +def load(tokenizer, cfg, ds_cfg: Optional[Dict[str, Any]] = None): + conversation = ( + ds_cfg["conversation"] if ds_cfg and "conversation" in ds_cfg else None + ) + field_human = ds_cfg["field_human"] if ds_cfg and "field_human" in ds_cfg else None + field_model = ds_cfg["field_model"] if ds_cfg and "field_model" in ds_cfg else None + roles = ds_cfg["roles"].to_dict() if ds_cfg and "roles" in ds_cfg else None + strategy = SimpleShareGPTPromptTokenizingStrategy( + ShareGPTPrompterV2( + conversation=conversation, + role_key_model=field_model, + role_key_human=field_human, + roles=roles, + ), + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + ) + if ds_cfg and "strict" in ds_cfg: + strategy.strict = ds_cfg["strict"] + return strategy + + +def load_ultrachat(tokenizer, cfg, ds_cfg: Optional[Dict[str, Any]] = None): + conversation = ( + ds_cfg["conversation"] if ds_cfg and "conversation" in ds_cfg else None + ) + strategy = UltrachatShareGPTPromptTokenizingStrategy( + ShareGPTPrompterV2( + conversation=conversation, + ), + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + ) + if ds_cfg and "strict" in ds_cfg: + strategy.strict = ds_cfg["strict"] + return strategy + + +def load_role(tokenizer, cfg): + return SimpleRoleShareGPTPromptTokenizingStrategy( + ShareGPTPrompterV2(), + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + ) + + +def load_guanaco(tokenizer, cfg): + return GuanacoShareGPTPromptTokenizingStrategy( + ShareGPTPrompterV2(), + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + ) + + +def load_glaive(tokenizer, cfg, ds_cfg: Optional[Dict[str, Any]] = None): + conversation = ( + ds_cfg["conversation"] + if ds_cfg and "conversation" in ds_cfg + else "chatml_glaive" + ) + return GlaiveShareGPTPromptTokenizingStrategy( + ShareGPTPrompterV2(conversation=conversation), + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + ) + + +class SimpleShareGPTPromptTokenizingStrategy(ShareGPTPromptTokenizingStrategy): + """ + basic sharegpt strategy to grab conversations from the sample row + """ + + _strict = False + + @property + def strict(self): + return self._strict + + @strict.setter + def strict(self, strict): + self._strict = strict + + def get_conversation_thread(self, prompt): + conversations = prompt["conversations"] + if self.strict: + return conversations + role_key = "from" + if "role" in conversations[0].keys(): + role_key = "role" + value_key = "value" + if "text" in conversations[0].keys(): + value_key = "text" + elif "content" in conversations[0].keys(): + value_key = "content" + # remap roles - allow for assistant turn" + role_map = { + "user": "human", + "human": "human", + "assistant": "gpt", + "gpt": "gpt", + "system": "system", + } + turns = [ + { + "from": ( + role_map[t[role_key]] if t[role_key] in role_map else t[role_key] + ), + "value": t[value_key], + } + for t in conversations + ] + return turns + + +class SimpleRoleShareGPTPromptTokenizingStrategy(ShareGPTPromptTokenizingStrategy): + """ + basic sharegpt strategy to grab conversations from the sample row, but uses role instead of from + """ + + def get_conversation_thread(self, prompt): + conversations = prompt["conversations"] + # remap role: prompter/assistant, text: ... => from: human/gpt, value: ... + turns = [{"from": t["role"], "value": t["value"]} for t in conversations] + return turns + + +class GuanacoShareGPTPromptTokenizingStrategy(ShareGPTPromptTokenizingStrategy): + """ + sharegpt strategy that remaps oasst data to sharegpt format + """ + + def get_conversation_thread(self, prompt): + conversations = prompt["conversations"] + # remap role: prompter/assistant, text: ... => from: human/gpt, value: ... + role_map = {"prompter": "human", "assistant": "gpt"} + turns = [ + {"from": role_map[t["role"]], "value": t["text"]} for t in conversations + ] + return turns + + +class UltrachatShareGPTPromptTokenizingStrategy(SimpleShareGPTPromptTokenizingStrategy): + """ + sharegpt strategy that remaps ultrachat data to sharegpt format + """ + + def get_conversation_thread(self, prompt): + conversations = prompt["messages"] + role_map = {"user": "human", "assistant": "gpt"} + turns = [ + {"from": role_map[t["role"]], "value": t["content"]} for t in conversations + ] + return turns + + +class GlaiveShareGPTPromptTokenizingStrategy(SimpleShareGPTPromptTokenizingStrategy): + """ + sharegpt strategy that remaps glaive data to sharegpt format + """ + + def get_conversation_thread(self, prompt): + conversation = chatml_to_conversation(prompt) + conversation = merge_consecutive_messages(conversation) + + return conversation diff --git a/src/axolotl/prompt_strategies/sharegpt_jokes.py b/src/axolotl/prompt_strategies/sharegpt_jokes.py new file mode 100644 index 0000000000000000000000000000000000000000..404302c81ea643aa841b6d1639ce6f95090ebfc9 --- /dev/null +++ b/src/axolotl/prompt_strategies/sharegpt_jokes.py @@ -0,0 +1,28 @@ +"""Module for Jokes prompts using sharegpt style """ +from axolotl.prompt_tokenizers import ShareGPTPromptTokenizingStrategy +from axolotl.prompters import ShareGPTPrompterV2 + + +def load(tokenizer, cfg): + return SimpleJokesShareGPTPromptTokenizingStrategy( + ShareGPTPrompterV2(), + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + ) + + +class SimpleJokesShareGPTPromptTokenizingStrategy(ShareGPTPromptTokenizingStrategy): + """ + Tokenization strategy for asking bot to tell a joke and then explain why its funny + """ + + # title, text, explanation + def get_conversation_thread(self, prompt): + title = "" if not prompt["title"] else prompt["title"] + " " + return [ + {"from": "human", "value": "Tell me a joke."}, + {"from": "gpt", "value": title + prompt["text"]}, + {"from": "human", "value": "Why is that joke funny?"}, + {"from": "gpt", "value": prompt["explanation"]}, + ] diff --git a/src/axolotl/prompt_strategies/user_defined.py b/src/axolotl/prompt_strategies/user_defined.py new file mode 100644 index 0000000000000000000000000000000000000000..e20e80c3a451fda218686128e3a32fc2a8ade369 --- /dev/null +++ b/src/axolotl/prompt_strategies/user_defined.py @@ -0,0 +1,98 @@ +""" +User Defined prompts with configuration from the YML config +""" + +from dataclasses import dataclass +from functools import partial +from typing import Optional, Tuple + +from axolotl.prompt_strategies.alpaca_w_system import ( + InstructionWSystemPromptTokenizingStrategy, + SystemDataPrompter, +) + + +@dataclass +class UserDefinedDatasetConfig: + """ + dataclass configuration representing a userdefined dataset type + """ + + system_prompt: str = "" + field_system: str = "system" + field_instruction: str = "instruction" + field_input: str = "input" + field_output: str = "output" + format: str = "{instruction} {input} " + no_input_format: str = "{instruction} " + system_format: str = "{system}" + + def __getitem__(self, item): + return getattr(self, item) + + +class UserDefinedPromptTokenizationStrategy(InstructionWSystemPromptTokenizingStrategy): + """ + Prompt Tokenization Strategy for user defined prompts + """ + + +def load(tokenizer, cfg, ds_cfg: Optional[UserDefinedDatasetConfig] = None): + if not ds_cfg: + raise ValueError("Missing dataset prompt configuration") + + system_prompt = "" + if ds_cfg.system_prompt: + system_prompt = ds_cfg.system_prompt + + def parse_instruction_fields( + field_instruction, + field_input, + field_output, + field_system, + system_prompt, + prompt, + ) -> Tuple[str, str, str, str]: + return ( + prompt[field_instruction], + prompt[field_input] if field_input in prompt else "", + prompt[field_output] if field_output in prompt else "", + prompt[field_system] if field_system in prompt else system_prompt, + ) + + turn_format = ds_cfg.format + turn_no_input_format = ds_cfg.no_input_format + system_format = ds_cfg.system_format + + class UserDefinedPrompter(SystemDataPrompter): + """ + Prompter for user defined prompts + """ + + def match_prompt_style(self): + self.turn_format = turn_format + self.turn_no_input_format = turn_no_input_format + self.system_format = system_format + + prompter = UserDefinedPrompter() + + strat = UserDefinedPromptTokenizationStrategy( + prompter, + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + ) + + setattr( + strat, + "parse_instruction_fields", + partial( + parse_instruction_fields, + ds_cfg.field_instruction, + ds_cfg.field_input, + ds_cfg.field_output, + ds_cfg.field_system, + system_prompt, + ), + ) + return strat diff --git a/src/axolotl/prompt_tokenizers.py b/src/axolotl/prompt_tokenizers.py new file mode 100644 index 0000000000000000000000000000000000000000..bb13cf76dda7a10526ea5f1e57f5491c26087ff0 --- /dev/null +++ b/src/axolotl/prompt_tokenizers.py @@ -0,0 +1,501 @@ +"""Module containing PromptTokenizingStrategy and Prompter classes""" + +import abc +import copy +import logging +from typing import Dict, List, Tuple, Union + +from fastchat.conversation import Conversation +from transformers import BatchEncoding, PreTrainedTokenizer + +from axolotl.monkeypatch.fastchat_conversation_turns import ( + add_get_turns_to_conversation, +) +from axolotl.prompters import IGNORE_TOKEN_ID, Prompter + +LOG = logging.getLogger("axolotl") + +IGNORE_INDEX = -100 +LLAMA_DEFAULT_PAD_TOKEN = "" # nosec +LLAMA_DEFAULT_EOS_TOKEN = "" # nosec +LLAMA_DEFAULT_BOS_TOKEN = "" # nosec +LLAMA_DEFAULT_UNK_TOKEN = "" # nosec + +add_get_turns_to_conversation() + + +class InvalidDataException(Exception): + """ + Exception raised when the data is invalid + """ + + +class PromptTokenizingStrategy(abc.ABC): + """ + Abstract class for tokenizing strategies + """ + + def __init__( + self, + prompter: Prompter, + tokenizer, + train_on_inputs: bool = False, + sequence_len: int = 2048, + ): + self.prompter = prompter + self.tokenizer: PreTrainedTokenizer = tokenizer + self.train_on_inputs = train_on_inputs + # sequence_len and max_length can be different for CompletionPromptTokenizingStrategy. + # TODO: Document how they are different. + self.sequence_len = sequence_len + self.max_length = sequence_len + + @abc.abstractmethod + def tokenize_prompt(self, prompt): + pass + + @property + def supports_batched(self): + return False + + def _tokenize( + self, prompt: str, add_eos_token: bool = True, strip_bos_token: bool = False + ) -> BatchEncoding: + empty = BatchEncoding(data={"input_ids": [], "attention_mask": []}) + if not prompt: + LOG.warning("Empty text requested for tokenization.") + return empty + + result = self.tokenizer( + prompt, + truncation=True, + max_length=self.max_length, + padding=False, + return_tensors=None, + ) + if len(result["input_ids"]) == 0: + LOG.warning("Tokenizer result is empty. You may want to audit your dataset") + return empty + + if ( + result["input_ids"][-1] != self.tokenizer.eos_token_id + and len(result["input_ids"]) < self.max_length + and add_eos_token + ): + result["input_ids"].append(self.tokenizer.eos_token_id) + result["attention_mask"].append(1) + + if result["input_ids"][0] == self.tokenizer.bos_token_id and strip_bos_token: + result["input_ids"] = result["input_ids"][1:] + result["attention_mask"] = result["attention_mask"][1:] + + result["labels"] = result["input_ids"].copy() + return result + + +class InstructionPromptTokenizingStrategy(PromptTokenizingStrategy): + """ + Tokenizing strategy for instruction-based prompts. + """ + + def parse_instruction_fields( + self, prompt + ) -> Union[Tuple[str, str, str], Tuple[str, str, str, str]]: + raise NotImplementedError + + def tokenize_prompt(self, prompt): + ( + instruction, + input, # pylint: disable=redefined-builtin + response, + ) = self.parse_instruction_fields(prompt) + user_prompt = next( + iter( + self.prompter.build_prompt( + instruction, + input, + ) + ) + ) + tokenized_prompt = self._tokenize(user_prompt, add_eos_token=False) + if not self.train_on_inputs: + user_prompt_len = len(tokenized_prompt["input_ids"]) + # TODO this could be sped up using numpy array slicing + tokenized_prompt["labels"] = [IGNORE_INDEX] * user_prompt_len + tokenized_res_prompt = self._tokenize( + response, strip_bos_token=True, add_eos_token=True + ) + tokenized_prompt["input_ids"] += tokenized_res_prompt["input_ids"] + tokenized_prompt["attention_mask"] += tokenized_res_prompt["attention_mask"] + tokenized_prompt["labels"] += tokenized_res_prompt["input_ids"] + + return tokenized_prompt + + def _build_full_prompt( + self, instruction, input, response # pylint: disable=redefined-builtin + ): + return next( + iter( + self.prompter.build_prompt( + instruction, + input, + response, + ) + ) + ) + + +class AlpacaPromptTokenizingStrategy(InstructionPromptTokenizingStrategy): + """ + Tokenizing strategy for Alpaca prompts. + """ + + def parse_instruction_fields(self, prompt) -> Tuple[str, str, str]: + return ( + prompt["instruction"], + prompt["input"] if "input" in prompt else "", + prompt["output"], + ) + + +class AlpacaMultipleChoicePromptTokenizingStrategy(InstructionPromptTokenizingStrategy): + """ + Tokenizing strategy for Alpaca Multiple Choice prompts. + """ + + def parse_instruction_fields(self, prompt) -> Tuple[str, str, str]: + return ( + prompt["question"], + "\n".join(f'- "{choice}"' for choice in prompt["choices"]), + prompt["solution"] if "solution" in prompt else prompt["explanation"], + ) + + +class JeopardyPromptTokenizingStrategy(InstructionPromptTokenizingStrategy): + """ + Tokenizing strategy for Jeopardy prompts. + """ + + def parse_instruction_fields(self, prompt) -> Tuple[str, str, str]: + return ( + prompt["question"], + prompt["category"], + "what is " + prompt["answer"], + ) + + +class OpenAssistantPromptTokenizingStrategy(InstructionPromptTokenizingStrategy): + """ + Tokenizing strategy for OpenAssistant prompts. + """ + + def parse_instruction_fields(self, prompt) -> Tuple[str, str, str]: + return ( + prompt["INSTRUCTION"], + "", + prompt["RESPONSE"], + ) + + +class SummarizeTLDRPromptTokenizingStrategy(InstructionPromptTokenizingStrategy): + """ + Tokenizing strategy for SummarizeTLDR prompts. + """ + + def parse_instruction_fields(self, prompt) -> Tuple[str, str, str]: + return ( + prompt["article"], + "", + prompt["summary"], + ) + + +class GPTeacherPromptTokenizingStrategy(InstructionPromptTokenizingStrategy): + """ + Tokenizing strategy for GPTeacher prompts. + """ + + def parse_instruction_fields(self, prompt) -> Tuple[str, str, str]: + return ( + prompt["instruction"], + prompt["input"] if "input" in prompt else "", + prompt["response"], + ) + + +class NomicGPT4AllPromptTokenizingStrategy(InstructionPromptTokenizingStrategy): + """ + Tokenizing strategy for NomicGPT4All prompts. + """ + + def parse_instruction_fields(self, prompt) -> Tuple[str, str, str]: + return ( + prompt["prompt"], + "", + prompt["response"], + ) + + +class ReflectionPromptTokenizingStrategy(PromptTokenizingStrategy): + """ + Tokenizing strategy for Reflection prompts. + """ + + def parse_instruction_fields(self, prompt) -> Tuple[str, str, str, str, str]: + raise NotImplementedError + + def tokenize_prompt(self, prompt): + # pylint: disable=duplicate-code + ( + instruction, + input, # pylint: disable=redefined-builtin + output, + reflection, + corrected, + ) = self.parse_instruction_fields(prompt) + full_prompt = self._build_full_prompt( + instruction, input, output, reflection, corrected + ) + tokenized_full_prompt = self._tokenize(full_prompt) + if not self.train_on_inputs: + user_prompt = next( + iter( + self.prompter.build_prompt( + instruction, + input, + ) + ) + ) + tokenized_user_prompt = self._tokenize(user_prompt, add_eos_token=False) + user_prompt_len = len(tokenized_user_prompt["input_ids"]) + # TODO this could be sped up using numpy array slicing + tokenized_full_prompt["labels"] = [ + IGNORE_INDEX + ] * user_prompt_len + tokenized_full_prompt["labels"][user_prompt_len:] + + return tokenized_full_prompt + + def _build_full_prompt( + self, instruction, input, output, reflection, corrected + ): # pylint: disable=redefined-builtin + return next( + iter( + self.prompter.build_prompt( + instruction, + input, + output, + reflection, + corrected, + ) + ) + ) + + def _tokenize(self, prompt, add_eos_token=True, strip_bos_token=False): + result = self.tokenizer( + prompt, + truncation=True, + max_length=self.sequence_len, + padding=False, + return_tensors=None, + ) + if ( + result["input_ids"][-1] != self.tokenizer.eos_token_id + and len(result["input_ids"]) < self.sequence_len + and add_eos_token + ): + result["input_ids"].append(self.tokenizer.eos_token_id) + result["attention_mask"].append(1) + + result["labels"] = result["input_ids"].copy() + return result + + +class AlpacaReflectionPTStrategy(ReflectionPromptTokenizingStrategy): + """ + Tokenizing strategy for Alpaca Reflection prompts. + """ + + def parse_instruction_fields(self, prompt) -> Tuple[str, str, str, str, str]: + return ( + prompt["instruction"], + prompt["input"] if "input" in prompt else "", + prompt["output"], + prompt["reflection"], + prompt["corrected"], + ) + + +class ShareGPTPromptTokenizingStrategy(PromptTokenizingStrategy): + """ + Tokenizing strategy for ShareGPT prompts. + """ + + def get_conversation_thread(self, prompt): + return prompt["conversations"] + + def tokenize_prompt(self, prompt): + # Initial values. We will append to these as we go through the conversation. + result, current_len = tokenize_prompt_default() + conversation: Conversation = ( + self.prompter._conversation.copy() # pylint: disable=protected-access + ) + + input_roles = {conversation.roles[0]} + output_roles = {conversation.roles[1]} + + if len(conversation.roles) == 3: + tool_role_label = conversation.roles[2] + input_roles.add(tool_role_label) + + # Add roles from the config + if self.prompter.roles: + if "input" in self.prompter.roles and self.prompter.roles["input"]: + for role in self.prompter.roles["input"]: + input_roles.add(role) + + if "output" in self.prompter.roles and self.prompter.roles["output"]: + for role in self.prompter.roles["output"]: + output_roles.add(role) + + # support for custom roles from the dataset, only useful for vicuna style prompts/roles + role_remap = [] + if ( + conversation.name == "vicuna_v1.1" + and "roles" in prompt + and len(prompt["roles"]) >= 2 + ): + role_remap = [ + {"from": conversation.roles[0], "to": prompt["roles"][0]}, + {"from": conversation.roles[1], "to": prompt["roles"][1]}, + ] + + try: + for _, part in enumerate( + self.prompter.build_prompt(self.get_conversation_thread(prompt)) + ): + if not isinstance(part, tuple): + LOG.warning(f"expected tuple, got {part}") + continue + + role, content = part + + # Uses "in" because role contains extra characters + input_turn = any(r.lower() in role.lower() for r in input_roles) + output_turn = any(r.lower() in role.lower() for r in output_roles) + empty_role = role.strip() == "" + + if not any([input_turn, output_turn, empty_role]): + LOG.warning(f"unhandled role: {role}") + continue + + if input_turn: + role = ( + role.replace(role_remap[0]["from"], role_remap[0]["to"]) + if role_remap + else role + ) + turn = role + content + # this is still the user query, we should + if not content.strip(): + LOG.warning(f"user turn has empty text: {prompt}") + res = self._tokenize( + turn, + add_eos_token=False, + strip_bos_token=True, + ) + if self.train_on_inputs: + labels = copy.deepcopy(res["input_ids"]) + else: + # everything from this is masked out from the labels + labels = [IGNORE_TOKEN_ID] * len(res["input_ids"]) + elif output_turn: + role = ( + role.replace(role_remap[1]["from"], role_remap[1]["to"]) + if role_remap + else role + ) + turn = role + content + # this should be the assistant response, should end with an eos token + if not content.strip(): + LOG.warning(f"assistant turn has empty text: {prompt}") + add_eos_token = not ( + conversation.name == "chatml" + and conversation.sep == self.tokenizer.eos_token + ) + res = self._tokenize( + turn, + add_eos_token=add_eos_token, + strip_bos_token=True, + ) + role_res = self._tokenize( + role.rstrip(), + add_eos_token=False, + strip_bos_token=True, + ) + labels = copy.deepcopy(res["input_ids"]) + if not self.train_on_inputs: + # mask out role tokens from the labels + len_role = len(role_res["input_ids"]) + labels[:len_role] = [IGNORE_TOKEN_ID] * min( + len_role, len(labels) + ) + elif empty_role: + turn = content + # this is only ever the first part, should include the bos token and the user query + res = self._tokenize( + turn, add_eos_token=False, strip_bos_token=False + ) + if self.train_on_inputs: + labels = copy.deepcopy(res["input_ids"]) + else: + # everything from this is masked out from the labels + labels = [IGNORE_TOKEN_ID] * len(res["input_ids"]) + + # pylint: disable=duplicate-code + result, current_len = parse_tokenized_to_result( + result, + current_len, + res, + labels, + pad_token_id=self.tokenizer.pad_token_id, + ) + return result + except (KeyError, AssertionError, IndexError) as err: + raise InvalidDataException(str(err)) from err + + +def tokenize_prompt_default() -> Tuple[Dict[str, List[int]], int]: + """ + Returns the default values for the tokenize prompt function + """ + + result: Dict[str, List[int]] = { + "input_ids": [], + "attention_mask": [], + "labels": [], + } + current_len = 0 + return result, current_len + + +def parse_tokenized_to_result( + result: Dict[str, List[int]], + current_len: int, + res: Dict[str, List[int]], + labels: List[int], + pad_token_id: Union[int, None] = None, +) -> Tuple[Dict[str, List[int]], int]: + """ + Parses the tokenized prompt and append the tokenized input_ids, attention_mask and labels to the result + """ + + input_ids = res["input_ids"] + input_len = len(input_ids) + result["input_ids"][current_len : current_len + input_len] = input_ids + result["attention_mask"][current_len : current_len + input_len] = [ + 1 if x != pad_token_id else 0 for x in input_ids + ] + result["labels"][current_len : current_len + input_len] = labels + current_len += input_len + + return result, current_len diff --git a/src/axolotl/prompters.py b/src/axolotl/prompters.py new file mode 100644 index 0000000000000000000000000000000000000000..2b6b4f8577d2af0fb9cd861283a35029db6b1258 --- /dev/null +++ b/src/axolotl/prompters.py @@ -0,0 +1,399 @@ +"""Module containing prompters""" + +import logging +from enum import Enum +from typing import Generator, Optional, Union + +from colorama import Fore +from fastchat.conversation import Conversation, get_conv_template + +LOG = logging.getLogger("axolotl") +IGNORE_TOKEN_ID = -100 +REPR_TEMPLATE = "\n\n" + Fore.CYAN + "{full_prompt}" + Fore.RESET + "\n\n" + + +class PromptStyle(Enum): + """ + Enum for prompt styles + """ + + INSTRUCT = "instruct" + CHAT = "chat" + CHATML = "chatml" + + +class Prompter: + """ + Base prompter class for all prompters + """ + + +class AlpacaPrompter(Prompter): + """ + Base class for alpaca prompters + """ + + system_prompt = "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request." + system_no_input_prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request." + system_format: str = "{system}" + turn_format: str + turn_no_input_format: str + prompt_style: Optional[PromptStyle] = None + + def __init__(self, prompt_style=PromptStyle.INSTRUCT.value): + self.prompt_style = prompt_style if prompt_style else PromptStyle.INSTRUCT.value + self.match_prompt_style() + + def match_prompt_style(self): + # pylint: disable=duplicate-code + if self.prompt_style == PromptStyle.INSTRUCT.value: + self.turn_format = "### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n" + self.turn_no_input_format = ( + "### Instruction:\n{instruction}\n\n### Response:\n" + ) + self.system_format = "{system}\n\n" + if self.prompt_style == PromptStyle.CHAT.value: + self.turn_format = "USER: {instruction}\n{input}\nASSISTANT:" + self.turn_no_input_format = "USER: {instruction}\nASSISTANT:" + self.system_format = "SYSTEM: {system}\n" + if self.prompt_style == PromptStyle.CHATML.value: + self.turn_format = "<|im_start|>user\n{instruction}\n{input}<|im_end|>\n<|im_start|>assistant\n" + self.turn_no_input_format = ( + "<|im_start|>user\n{instruction}<|im_end|>\n<|im_start|>assistant\n" + ) + self.system_format = "<|im_start|>system\n{system}<|im_end|>\n" + + def _build_result(self, instruction, input_text, output): + # returns the full prompt from instruction and optional input + # if a label (=response, =output) is provided, it's also appended. + if input_text: + res = ( + self.system_format.format(system=self.system_prompt) + if self.system_prompt + else "" + ) + self.turn_format.format(instruction=instruction, input=input_text) + else: + res = ( + self.system_format.format(system=self.system_no_input_prompt) + if self.system_no_input_prompt + else "" + ) + self.turn_no_input_format.format(instruction=instruction) + if output: + res = f"{res}{output}" + + return res + + def build_prompt( + self, + instruction: str, + input: Union[None, str] = None, # pylint: disable=redefined-builtin + output: Union[None, str] = None, + ) -> Generator[str, None, None]: + yield self._build_result(instruction, input, output) + + def __repr__(self) -> str: + return REPR_TEMPLATE.format( + full_prompt=self._build_result("{instruction}", "{input}", "{output}") + ) + + +class UnpromptedPrompter(AlpacaPrompter): + """ + Prompter for alpaca no system prompt + """ + + system_prompt = "" + system_no_input_prompt = "" + + +class JeopardyPrompter(AlpacaPrompter): + """ + Prompter for Jeopardy + """ + + prompt_input = "Below is a Jeopardy clue paired with input providing the category of the clue. Write a concise response that best answers tbe clue given the category.\n\n### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n" + + +class MultipleChoiceExplainPrompter(AlpacaPrompter): + """ + Prompter for multiple choice explain + """ + + system_prompt = ( + "Choose the answer that best answers the question. Explain your reasoning.\n" + ) + system_no_input_prompt = ( + "Choose the answer that best answers the question. Explain your reasoning.\n" + ) + + +class MultipleChoiceConcisePrompter(AlpacaPrompter): + """ + Prompter for multiple choice concise + """ + + system_prompt = "Choose the answer that best answers the question. Be concise in your response.\n\n" + system_no_input_prompt = "Choose the answer that best answers the question. Be concise in your response.\n\n" + + def match_prompt_style(self): + self.turn_format = "USER: {instruction}\n{input}\nASSISTANT:" + self.turn_no_input_format = "USER: {instruction}\nASSISTANT:" + + +class SummarizeTLDRPrompter(AlpacaPrompter): + """ + Prompter for summarize TLDR + """ + + system_prompt = "" + system_no_input_prompt = "" + + def match_prompt_style(self): + self.turn_format = "USER: Summarize the following article as a TL;DR.\n{instruction}\n{input}\nASSISTANT:" + self.turn_no_input_format = "USER: Summarize the following article as a TL;DR.\n{instruction}\nASSISTANT:" + + +class GPTeacherPrompter(AlpacaPrompter): + """ + Prompter for GPTeacher + """ + + +class NomicGPT4AllPrompter(AlpacaPrompter): + """ + Prompter for NomicGPT4All + """ + + +class ReflectAlpacaPrompter(Prompter): + """ + Prompter for ReflectAlpaca + """ + + system_prompt = "Below is an instruction that describes a task, paired with an input that provides further context. You, the Assistant, should generate a response as if it were an abstract for an academic or technical paper on the query along with a methodology. Then generate an Agent Reflection where you create a long form response as if from subject matter expert, be verbose, diligent, and creative in your application of knowledge, apply it through the lens of the response generated by the assistant. Look for flawed reasoning, faulty logic, or other mistakes in the method. Finally, generate a final response and method for the user with the Assistant abstract and Reflection analysis as augmentations to the generation\n\n" + system_no_input_prompt = "Below is an instruction that describes a task. You, the Assistant, should generate a response as if it were an abstract for an academic or technical paper on the query along with a methodology. Then generate an Agent Reflection where you create a long form response as if from subject matter expert, be verbose, diligent, and creative in your application of knowledge, apply it through the lens of the response generated by the assistant. Look for flawed reasoning, faulty logic, or other mistakes in the method. Finally, generate a final response and method for the user with the Assistant abstract and Reflection analysis as augmentations to the generation\n\n" + + prompt_input = ( + "### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n" + ) + prompt_no_input = "### Instruction:\n{instruction}\n\n### Response:\n" + agent_label = "### Thought:\n{output}\n\n### Agent Reflection:\n{reflection}\n\n### Final Response:\n{corrected}" + response_split = "### Response:" + + def __init__(self, prompt_style="instruct"): + self.prompt_style = prompt_style + self.match_prompt_style() + + def match_prompt_style(self): + if self.prompt_style == PromptStyle.INSTRUCT.value: + self.prompt_input = ( + self.system_prompt + + "### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n" + ) + self.prompt_no_input = ( + self.system_no_input_prompt + + "### Instruction:\n{instruction}\n\n### Response:\n" + ) + self.agent_label = "### Thought:\n{output}\n\n### Agent Reflection:\n{reflection}\n\n### Final Response:\n{corrected}" + self.response_split = "### Final Response:" + if self.prompt_style == PromptStyle.CHAT.value: + self.prompt_input = ( + self.system_prompt + "USER: {instruction}\n{input}\nASSISTANT:" + ) + self.prompt_no_input = ( + self.system_no_input_prompt + "USER: {instruction}\nASSISTANT:" + ) + self.agent_label = ( + "\nTHOUGHT: {output}\nASSISTANT REFLECTION: {reflection}\nASSISTANT:" + ) + self.response_split = "ASSISTANT:" + + def _build_result( + self, + instruction: str, + input: Union[None, str] = None, # pylint: disable=redefined-builtin + output: Union[None, str] = None, + reflection: Union[None, str] = None, + corrected: Union[None, str] = None, + ): + # returns the full prompt from instruction and optional input + # if a label (=response, =output) is provided, it's also appended. + if input: + res = self.prompt_input.format(instruction=instruction, input=input) + else: + res = self.prompt_no_input.format(instruction=instruction) + if output and reflection and corrected: + label = self.agent_label.format( + output=output, + reflection=reflection, + corrected=corrected, + ) + res = f"{res}{label}" + + return res + + def build_prompt( + self, + instruction: str, + input: Union[None, str] = None, # pylint: disable=redefined-builtin + output: Union[None, str] = None, + reflection: Union[None, str] = None, + corrected: Union[None, str] = None, + ) -> Generator[str, None, None]: + # pylint: disable=duplicate-code + yield self._build_result( + instruction, + input, + output, + reflection, + corrected, + ) + + def __repr__(self) -> str: + return REPR_TEMPLATE.format( + full_prompt=self._build_result("{instruction}", "{input}", "{output}") + ) + + +SHAREGPT_ASSERTION_FAILED_ROLE = ( + "Role did not alternate between turns (gpt and human). Please check your data." +) + +CONVERSATION_ROLE_FORMAT = { + "chatml": "<|im_start|>{ROLE}", + "zephyr": "<|{ROLE}|>", + "vicuna_v1.1": "{ROLE}", +} + + +class ShareGPTPrompter(Prompter): # pylint: disable=too-few-public-methods + """ + A prompter that generates prompts for the ShareGPT + """ + + role_key_human = "human" + role_key_model = "gpt" + # Optional, only used for tool usage datasets. + role_key_tool: Optional[str] = None + # Optional, role input/output mapping + roles: Optional[dict] = None + + def __init__( + self, + prompt_style=None, # pylint: disable=unused-argument + conversation: Optional[Union[str, Conversation]] = None, + role_key_human: Optional[str] = None, + role_key_model: Optional[str] = None, + role_key_tool: Optional[str] = None, + roles: Optional[dict] = None, + ): + if conversation: + if isinstance(conversation, Conversation): + self._conversation = conversation + else: + self._conversation = get_conv_template(conversation) + else: + self._conversation = get_conv_template("vicuna_v1.1") + if role_key_human: + self.role_key_human = role_key_human + if role_key_model: + self.role_key_model = role_key_model + if role_key_tool: + self.role_key_tool = role_key_tool + if roles: + self.roles = roles + + def _build_result(self, source): + if len(source) < 2: + # If there isn't a back and forth conversation, ignore it + # also happens on the data splitting leaving empty conversations + raise IndexError( + f"A conversation entry has less than 2 messages :\n{source}" + ) + + conv = self._conversation.copy() + + # Add the conversation system prompt if provided, otherwise use the default one + if source[0]["from"] == "system": + conv.set_system_message(source[0]["value"]) + source.pop(0) + + roles = {self.role_key_human: conv.roles[0], self.role_key_model: conv.roles[1]} + if self.role_key_tool: + roles[self.role_key_tool] = conv.roles[2] + + try: + # Apply prompt templates + if source[0]["from"] not in roles: + # Skip the first one if it is not from human + source = source[1:] + except IndexError as err: + # sometimes there is a bing or system chat + raise err + + conv.messages = [] + for _, sentence in enumerate(source): + from_role = sentence["from"] + if from_role in roles: + role = roles[from_role] + else: + if self._conversation.name not in CONVERSATION_ROLE_FORMAT: + raise NotImplementedError( + f"Role ({role}) not in default roles, and {self._conversation.name} does not support role remapping yet." + "Please help us by creating an Issue to add support for this conversation type." + ) + + role = CONVERSATION_ROLE_FORMAT[self._conversation.name].format( + ROLE=from_role + ) + + if len(conv.messages) > 0 and ((role == conv.messages[-1][0])): + LOG.warning(f"{SHAREGPT_ASSERTION_FAILED_ROLE}: {sentence}") + + conv.append_message(role, sentence["value"]) + + return conv.get_turns() + + def build_prompt(self, source) -> Generator[str, None, None]: + turns = self._build_result(source) + + for part in turns: + if part[0] and not part[1]: + LOG.warning(f"role with empty message: {part[0]}") + yield part + + def __repr__(self) -> str: + turns = self._build_result([{"from": "{from}", "value": "{value}"}]) + return "\n".join([REPR_TEMPLATE.format(full_prompt=part) for part in turns]) + + +class ShareGPTPrompterV2(ShareGPTPrompter): + """ + A V2 prompter that generates prompts for the ShareGPT + """ + + def __init__( + self, + conversation: Optional[Union[str, Conversation]] = None, + role_key_human: Optional[str] = None, + role_key_model: Optional[str] = None, + roles: Optional[dict] = None, + ): + super().__init__( + conversation=conversation, + role_key_human=role_key_human, + role_key_model=role_key_model, + roles=roles, + ) + + +class UnsupportedPrompter(Prompter): + """ + A dummy class for custom prompters + """ + + def __init__(self) -> None: + pass + + def __repr__(self): + return "Pre-tokenized or custom dataset types are unsupported for logging" diff --git a/src/axolotl/train.py b/src/axolotl/train.py new file mode 100644 index 0000000000000000000000000000000000000000..b6cd24672e836e8d3ab9ad67bc3136196da6ea0d --- /dev/null +++ b/src/axolotl/train.py @@ -0,0 +1,234 @@ +"""Prepare and train a model on a dataset. Can also infer from a model or merge lora""" + +import os +import signal +import sys +from dataclasses import dataclass +from pathlib import Path +from typing import Optional, Tuple, Union + +import torch +import transformers.modelcard +from accelerate.logging import get_logger +from datasets import Dataset +from peft import PeftModel +from pkg_resources import get_distribution # type: ignore +from transformers import PreTrainedModel, PreTrainedTokenizer +from transformers.integrations.deepspeed import is_deepspeed_zero3_enabled + +from axolotl.common.cli import TrainerCliArgs +from axolotl.logging_config import configure_logging +from axolotl.utils.dict import DictDefault +from axolotl.utils.freeze import freeze_layers_except +from axolotl.utils.models import load_model, load_tokenizer +from axolotl.utils.trainer import setup_trainer + +try: + from optimum.bettertransformer import BetterTransformer +except ImportError: + BetterTransformer = None + +project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) +src_dir = os.path.join(project_root, "src") +sys.path.insert(0, src_dir) + +configure_logging() +LOG = get_logger("axolotl.train") + + +@dataclass +class TrainDatasetMeta: + """ + dataclass to capture the dataset specific options for training + """ + + train_dataset: Dataset + eval_dataset: Optional[Dataset] = None + total_num_steps: Optional[int] = None + + +def train( + *, cfg: DictDefault, cli_args: TrainerCliArgs, dataset_meta: TrainDatasetMeta +) -> Tuple[Union[PeftModel, PreTrainedModel], PreTrainedTokenizer]: + # load the tokenizer first + LOG.debug( + f"loading tokenizer... {cfg.tokenizer_config or cfg.base_model_config}", + main_process_only=True, + ) + tokenizer = load_tokenizer(cfg) + + train_dataset = dataset_meta.train_dataset + eval_dataset = dataset_meta.eval_dataset + total_num_steps = dataset_meta.total_num_steps + + if cfg.resume_from_checkpoint is None and cfg.auto_resume_from_checkpoints: + possible_checkpoints = [ + str(cp) for cp in Path(cfg.output_dir).glob("checkpoint-*") + ] + if len(possible_checkpoints) > 0: + sorted_paths = sorted( + possible_checkpoints, + key=lambda path: int(path.split("-")[-1]), + ) + cfg.resume_from_checkpoint = sorted_paths[-1] + LOG.info( + f"Using Auto-resume functionality to start with checkpoint at {cfg.resume_from_checkpoint}" + ) + resume_from_checkpoint = cfg.resume_from_checkpoint + + # Load the model and tokenizer + msg = "loading model" + if cfg.adapter: + msg += " and peft_config..." + LOG.debug(msg) + model, peft_config = load_model(cfg, tokenizer, inference=cli_args.inference) + model.generation_config.do_sample = True + + model_ref = None + if cfg.rl and cfg.rl != "orpo": + if cfg.adapter and not cfg.rl_adapter_ref_model: + # use built-in trl autounwrap + LOG.debug("Passing model_ref: None to RL trainer") + model_ref = None # explicit setting to None + else: + # load the model again for model_ref/baseline + model_ref, _ = load_model( + cfg, tokenizer, inference=cli_args.inference, reference_model=True + ) + + safe_serialization = cfg.save_safetensors is True + + if cfg.unfrozen_parameters: + freeze_layers_except(model, cfg.unfrozen_parameters) + + trainer = setup_trainer( + cfg, + train_dataset, + eval_dataset, + (model, model_ref, peft_config), + tokenizer, + total_num_steps, + ) + + # go ahead and presave, so we have the adapter config available to inspect + if peft_config: + LOG.info(f"Pre-saving adapter config to {cfg.output_dir}") + peft_config.save_pretrained(cfg.output_dir) + # additionally presave the tokenizer and model configs + if not Path(cfg.output_dir).is_dir(): + os.makedirs(cfg.output_dir, exist_ok=True) + tokenizer.save_pretrained(str(Path(cfg.output_dir))) + if hasattr(model, "config"): + model.config.save_pretrained(str(Path(cfg.output_dir))) + + # In case we want to stop early with ctrl+c, this is a nice to have to save the pretrained model + if cfg.local_rank == 0: + + def terminate_handler(_, __, model): + if cfg.flash_optimum and BetterTransformer: + model = BetterTransformer.reverse(model) + model.save_pretrained(cfg.output_dir, safe_serialization=safe_serialization) + sys.exit(0) + + signal.signal( + signal.SIGINT, lambda signum, frame: terminate_handler(signum, frame, model) + ) + + badge_markdown = """[Built with Axolotl](https://github.com/OpenAccess-AI-Collective/axolotl)""" + transformers.modelcard.AUTOGENERATED_TRAINER_COMMENT += f"\n{badge_markdown}" + + if getattr(cfg, "axolotl_config_path"): + raw_axolotl_cfg = Path(cfg.axolotl_config_path) + version = get_distribution("axolotl").version + if raw_axolotl_cfg.is_file(): + transformers.modelcard.AUTOGENERATED_TRAINER_COMMENT += f"\n
See axolotl config\n\naxolotl version: `{version}`\n```yaml\n{raw_axolotl_cfg.read_text(encoding='utf-8')}\n```\n\n

\n" + + LOG.info("Starting trainer...") + if cfg.group_by_length: + LOG.info("hang tight... sorting dataset for group_by_length") + + pretrain_hooks(cfg, trainer) + if cfg.flash_optimum: + with torch.backends.cuda.sdp_kernel( + # TODO configure these from the YAML w/ sdp_kernel_kwargs: ... + enable_flash=True, + enable_math=True, + enable_mem_efficient=True, + ): + trainer.train(resume_from_checkpoint=resume_from_checkpoint) + else: + trainer.train(resume_from_checkpoint=resume_from_checkpoint) + post_train_hooks(cfg, trainer) + + LOG.info(f"Training Completed!!! Saving pre-trained model to {cfg.output_dir}") + + # post training + for name, module in model.named_modules(): + if hasattr(module, "_post_training"): + module._post_training(model, name) # pylint: disable=protected-access + + if trainer.is_fsdp_enabled: + trainer.accelerator.state.fsdp_plugin.set_state_dict_type("FULL_STATE_DICT") + LOG.info("Set FSDP state dict type to FULL_STATE_DICT for saving.") + + if cfg.relora_steps: + if cfg.adapter == "lora" and not (cfg.load_in_4bit or cfg.load_in_8bit): + model = model.merge_and_unload() + else: + # final model weights have already been saved by `ReLoRACallback.on_train_end` + return model, tokenizer + + # TODO do we need this fix? https://huggingface.co/docs/accelerate/usage_guides/fsdp#saving-and-loading + # only save on rank 0, otherwise it corrupts output on multi-GPU when multiple processes attempt to write the same file + if cfg.fsdp: + trainer.save_model(cfg.output_dir) + elif cfg.deepspeed and is_deepspeed_zero3_enabled(): + # Copied over from: https://github.com/huggingface/accelerate/blob/5ae611118057232f441055f7ef9ba0b0f2b8d533/docs/source/usage_guides/deepspeed.md#saving-and-loading + trainer.accelerator.wait_for_everyone() + unwrapped_model = trainer.accelerator.unwrap_model(trainer.model_wrapped) + + # Saves the whole/unpartitioned fp16 model when in ZeRO Stage-3 to the output directory if + # `stage3_gather_16bit_weights_on_model_save` is True in DeepSpeed Config file or + # `zero3_save_16bit_model` is True in DeepSpeed Plugin. + # For Zero Stages 1 and 2, models are saved as usual in the output directory. + # The model name saved is `pytorch_model.bin` + unwrapped_model.save_pretrained( + cfg.output_dir, + is_main_process=trainer.accelerator.is_main_process, + save_function=trainer.accelerator.save, + state_dict=trainer.accelerator.get_state_dict(trainer.model_wrapped), + ) + elif cfg.local_rank == 0: + if cfg.flash_optimum and BetterTransformer: + model = BetterTransformer.reverse(model) + + model.save_pretrained(cfg.output_dir, safe_serialization=safe_serialization) + + if not cfg.hub_model_id: + try: + trainer.create_model_card(model_name=cfg.output_dir.lstrip("./")) + except AttributeError: + pass + elif cfg.hub_model_id: + # defensively push to the hub to ensure the model card is updated + trainer.push_to_hub() + + return model, tokenizer + + +def pretrain_hooks(_cfg, _trainer): + """ + Run hooks right before kicking off the training + :param cfg: + :param trainer: + :return: + """ + + +def post_train_hooks(_cfg, _trainer): + """ + Run hooks right after training completes + :param cfg: + :param trainer: + :return: + """ diff --git a/src/axolotl/utils/__init__.py b/src/axolotl/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/axolotl/utils/bench.py b/src/axolotl/utils/bench.py new file mode 100644 index 0000000000000000000000000000000000000000..11c25160da872eac524c860e649a66fc1215ae4c --- /dev/null +++ b/src/axolotl/utils/bench.py @@ -0,0 +1,84 @@ +"""Benchmarking and measurement utilities""" +import functools + +import pynvml +import torch +from pynvml.nvml import NVMLError + + +def check_cuda_device(default_value): + """ + wraps a function and returns the default value instead of running the + wrapped function if cuda isn't available or the device is auto + :param default_value: + :return: + """ + + def deco(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + device = kwargs.get("device", args[0] if args else None) + + if ( + device is None + or not torch.cuda.is_available() + or device == "auto" + or torch.device(device).type == "cpu" + or torch.device(device).type == "meta" + ): + return default_value + return func(*args, **kwargs) + + return wrapper + + return deco + + +@check_cuda_device(0.0) +def gpu_memory_usage(device=0): + return torch.cuda.memory_allocated(device) / 1024.0**3 + + +@check_cuda_device((0.0, 0.0, 0.0)) +def gpu_memory_usage_all(device=0): + usage = torch.cuda.memory_allocated(device) / 1024.0**3 + reserved = torch.cuda.memory_reserved(device) / 1024.0**3 + smi = gpu_memory_usage_smi(device) + return usage, reserved - usage, max(0, smi - reserved) + + +def mps_memory_usage_all(): + usage = torch.mps.current_allocated_memory() / 1024.0**3 + reserved = torch.mps.driver_allocated_memory() / 1024.0**3 + return usage, reserved - usage, 0 + + +@check_cuda_device(0.0) +def gpu_memory_usage_smi(device=0): + if isinstance(device, torch.device): + device = device.index + if isinstance(device, str) and device.startswith("cuda:"): + device = int(device[5:]) + try: + pynvml.nvmlInit() + handle = pynvml.nvmlDeviceGetHandleByIndex(device) + info = pynvml.nvmlDeviceGetMemoryInfo(handle) + return info.used / 1024.0**3 + except NVMLError: + return 0.0 + + +def log_gpu_memory_usage(log, msg, device): + if torch.backends.mps.is_available(): + usage, cache, misc = mps_memory_usage_all() + else: + usage, cache, misc = gpu_memory_usage_all(device) + extras = [] + if cache > 0: + extras.append(f"+{cache:.03f}GB cache") + if misc > 0: + extras.append(f"+{misc:.03f}GB misc") + log.info( + f"GPU memory usage {msg}: {usage:.03f}GB ({', '.join(extras)})", stacklevel=2 + ) + return usage, cache, misc diff --git a/src/axolotl/utils/callbacks/__init__.py b/src/axolotl/utils/callbacks/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6a489f6c0e1003bb00a5ba8be1c5f5fdf6680aef --- /dev/null +++ b/src/axolotl/utils/callbacks/__init__.py @@ -0,0 +1,757 @@ +"""Callbacks for Trainer class""" + +from __future__ import annotations + +import logging +import os +from shutil import copyfile +from tempfile import NamedTemporaryFile +from typing import TYPE_CHECKING, Dict, List + +import evaluate +import numpy as np +import pandas as pd +import torch +import torch.distributed as dist +import wandb +from datasets import load_dataset +from optimum.bettertransformer import BetterTransformer +from tqdm import tqdm +from transformers import ( + GenerationConfig, + Trainer, + TrainerCallback, + TrainerControl, + TrainerState, + TrainingArguments, +) +from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR, IntervalStrategy + +from axolotl.utils.bench import log_gpu_memory_usage +from axolotl.utils.distributed import ( + barrier, + broadcast_dict, + gather_scalar_from_all_ranks, + get_world_size, + is_distributed, + is_main_process, + zero_first, +) + +if TYPE_CHECKING: + from axolotl.core.trainer_builder import AxolotlTrainingArguments + +IGNORE_INDEX = -100 +LOG = logging.getLogger("axolotl.callbacks") + + +class EvalFirstStepCallback( + TrainerCallback +): # pylint: disable=too-few-public-methods disable=unused-argument + """ + Callback to trigger evals on the first step + """ + + def on_step_end( + self, + args: TrainingArguments, + state: TrainerState, + control: TrainerControl, + **kwargs, + ): + if ( + args.evaluation_strategy == IntervalStrategy.STEPS + and state.global_step == 1 + ): + control.should_evaluate = True + return control + + +class SaveBetterTransformerModelCallback( + TrainerCallback +): # pylint: disable=too-few-public-methods + """Callback to save the BetterTransformer wrapped model""" + + def on_step_end( + self, + args: TrainingArguments, + state: TrainerState, + control: TrainerControl, + **kwargs, + ): + # Save + if ( + args.save_strategy == IntervalStrategy.STEPS + and args.save_steps > 0 + and state.global_step % args.save_steps == 0 + ): + control.should_save = True + + if control.should_save: + checkpoint_folder = os.path.join( + args.output_dir, + f"{PREFIX_CHECKPOINT_DIR}-{state.global_step}", + ) + + model = BetterTransformer.reverse(kwargs["model"]) + model.save_pretrained(checkpoint_folder) + # FIXME - need to cleanup old checkpoints + + # since we're saving here, we don't need the trainer loop to attempt to save too b/c + # the trainer will raise an exception since it can't save a BetterTransformer wrapped model + control.should_save = False + return control + + +class GPUStatsCallback( + TrainerCallback +): # pylint: disable=too-few-public-methods disable=unused-argument + """Callback to track GPU utilization""" + + def __init__(self, cfg): + self.cfg = cfg + self.logged = False + + def on_step_end( + self, + args: TrainingArguments, + state: TrainerState, + control: TrainerControl, + **kwargs, + ): + if not self.logged and state.global_step > 1: + log_gpu_memory_usage(LOG, "while training", self.cfg.device) + self.logged = True + return control + + +class LossWatchDogCallback(TrainerCallback): + """Callback to track loss and stop training if loss is too high""" + + def __init__(self, cfg): + self.cfg = cfg + self.logged = False + self.violations = 0 + self.threshold = cfg.loss_watchdog_threshold + self.patience = cfg.loss_watchdog_patience or 3 + + def on_step_end( + self, + _args: TrainingArguments, + state: TrainerState, + control: TrainerControl, + **_kwargs, + ): + if len(state.log_history) > 0 and "loss" in state.log_history[-1]: + if state.log_history[-1]["loss"] > self.threshold: + self.violations += 1 + if self.violations >= self.patience: + LOG.warning( + "Loss is too high, stopping training (loss_watchdog_threshold)" + ) + control.should_training_stop = True + else: + self.violations = 0 + return control + + +def bench_eval_callback_factory(trainer, tokenizer): + accuracy = evaluate.load("accuracy") + abcd_idx = [ + tokenizer("A", add_special_tokens=False).input_ids[0], + tokenizer("B", add_special_tokens=False).input_ids[0], + tokenizer("C", add_special_tokens=False).input_ids[0], + tokenizer("D", add_special_tokens=False).input_ids[0], + tokenizer("E", add_special_tokens=False).input_ids[0], + tokenizer("F", add_special_tokens=False).input_ids[0], + tokenizer("G", add_special_tokens=False).input_ids[0], + ] + bench_split = "eval" + + def transform_bench_subject(example): + # Split on ':' and trim whitespace + parts = example["subject"].split(":") + first_part = ( + parts[0].strip().lower().replace("-", "_") + ) # Lowercase the first part + second_part = ( + parts[1].strip().replace("-", "_") if len(parts) > 1 else "all" + ) # Replace hyphens with underscores + + # Return the transformed values + return {"name": first_part, "subject": second_part} + + if trainer.args.bench_dataset == "mmlu-zs": + bench_dataset = load_dataset( + "openaccess-ai-collective/mmlu-evals", + data_files={ + "eval": "zero_shot_mmlu_val.json", + "test": "zero_shot_mmlu_test.json", + }, + ) + # bench_dataset = bench_dataset.remove_columns("subject") + # MMLU Five-shot (Eval/Test only) + elif trainer.args.bench_dataset in ["mmlu", "mmlu-fs"]: + bench_dataset = load_dataset( + "openaccess-ai-collective/mmlu-evals", + data_files={ + "eval": "five_shot_mmlu_val.json", + "test": "five_shot_mmlu_test.json", + }, + ) + # bench_dataset = bench_dataset.remove_columns('subject') + elif "/" in trainer.args.bench_dataset: + bench_ds = trainer.args.bench_dataset + bench_ds_name = "/".join(bench_ds.split("/", 2)[:2]) + bench_ds_data_file = "/".join(bench_ds.split("/", 2)[2:]) + bench_dataset = load_dataset( + bench_ds_name, + data_files={ + "eval": bench_ds_data_file, + }, + ) + bench_dataset["eval"] = bench_dataset["eval"].map(transform_bench_subject) + else: + raise ValueError( + f"unhandled value `{trainer.args.bench_dataset}` for bench_dataset training args" + ) + bench_dataset = bench_dataset[trainer.args.bench_split] + if trainer.args.max_bench_samples is not None: + bench_dataset = bench_dataset.select(range(trainer.args.max_bench_samples)) + + def tokenize_evals(example): + source = f"{tokenizer.bos_token}{example['input']}" + target = f"{example['output']}{tokenizer.eos_token}" + + tokenized_source = tokenizer( + source, + max_length=2048, + truncation=True, + add_special_tokens=False, + ) + tokenized_target = tokenizer( + target, + max_length=2048, + truncation=True, + add_special_tokens=False, + ) + input_ids = tokenized_source["input_ids"] + tokenized_target["input_ids"] + labels = [IGNORE_INDEX] * len(tokenized_source["input_ids"]) + tokenized_target[ + "input_ids" + ] + + return { + "input_ids": input_ids, + "labels": labels, + "subject": example["subject"], + } + + with zero_first(is_main_process()): + bench_dataset = bench_dataset.map(tokenize_evals) + bench_dataset = bench_dataset.filter(lambda x: x["labels"][-2] in abcd_idx) + + class BenchEvalCallback(TrainerCallback): + """ + TrainerCallback that runs the MMLU evals + """ + + def on_evaluate( + self, + args: AxolotlTrainingArguments, + state: TrainerState, # pylint: disable=unused-argument + control: TrainerControl, # pylint: disable=unused-argument + metrics: Dict[str, float], # pylint: disable=unused-argument + **kwargs, # pylint: disable=unused-argument + ): + data_loader = trainer.get_bench_dataloader( + bench_dataset.remove_columns(["input", "subject", "output", "name"]) + ) + trainer.model.eval() + preds, refs = [], [] + loss_bench = 0 + for batch in tqdm(data_loader, total=len(data_loader)): + (loss, logits, labels) = trainer.prediction_step( + trainer.model, + batch, + prediction_loss_only=False, + ) + # There are two tokens, the output, and eos token. + for i, logit in enumerate(logits): + label_non_zero_id = (batch["labels"][i] != IGNORE_INDEX).nonzero()[ + 0 + ][0] + logit_abcd = logit[label_non_zero_id - 1][abcd_idx] + preds.append(torch.argmax(logit_abcd).item()) + labels = labels[labels != IGNORE_INDEX].view(-1, 2)[:, 0] + refs += [ + abcd_idx.index(label) if label in abcd_idx else -1 + for label in labels.tolist() + ] + loss_bench += loss.item() + # Extract results by subject. + bench_name = bench_dataset["name"] + bench_names: dict = {s: {"refs": [], "preds": []} for s in set(bench_name)} + for s, p, r in zip(bench_name, preds, refs): # pylint: disable=invalid-name + bench_names[s]["preds"].append(p) + bench_names[s]["refs"].append(r) + barrier() + local_bench_names = bench_names + gathered_bench_names: List[Dict] = [{} for _ in range(get_world_size())] + # Gather results from all GPUs to GPU 0 + + loss_bench_ranks = gather_scalar_from_all_ranks( + lambda: loss_bench, get_world_size() + ) + len_data_loader_ranks = gather_scalar_from_all_ranks( + lambda: len(data_loader), get_world_size() + ) + + results = {} + if is_distributed() and not is_main_process(): + dist.gather_object(local_bench_names, dst=0) + else: + if is_distributed(): + dist.gather_object(local_bench_names, gathered_bench_names, dst=0) + else: + gathered_bench_names = [local_bench_names] + bench_loss = sum(loss_bench_ranks) / sum(len_data_loader_ranks) + results = {f"{bench_split}_bench_loss": bench_loss} + + # Combine results from all GPUs + combined_bench_names: Dict[str, Dict[str, List]] = {} + for bench_name in gathered_bench_names: + for name, data in bench_name.items(): + if name not in combined_bench_names: + combined_bench_names[name] = {"refs": [], "preds": []} + combined_bench_names[name]["refs"].extend(data["refs"]) + combined_bench_names[name]["preds"].extend(data["preds"]) + + bench_scores = [] + bench_refs = [] + bench_preds = [] + for ( + bench_name + ) in combined_bench_names: # pylint: disable=consider-using-dict-items + bench_score = accuracy.compute( + references=combined_bench_names[bench_name]["refs"], + predictions=combined_bench_names[bench_name]["preds"], + )["accuracy"] + bench_refs.extend(combined_bench_names[bench_name]["refs"]) + bench_preds.extend(combined_bench_names[bench_name]["preds"]) + if not pd.isna(bench_score): + results[ + f"{bench_split}_bench_accuracy_{bench_name}" + ] = bench_score + bench_scores.append(bench_score) + else: + results[f"{bench_split}_bench_accuracy_{bench_name}"] = 0.0 + bench_scores.append(0.0) + results[f"{bench_split}_bench_average_accuracy"] = np.mean(bench_scores) + results[f"{bench_split}_bench_total_accuracy"] = accuracy.compute( + references=bench_refs, predictions=bench_preds + )["accuracy"] + trainer.log(results) + + results = broadcast_dict(results) + for key, val in results.items(): + metrics[key] = val + + return BenchEvalCallback + + +def causal_lm_bench_eval_callback_factory(trainer: Trainer, tokenizer): + class CausalLMBenchEvalCallback(TrainerCallback): + """Callback to log prediction values during each evaluation""" + + def __init__(self, cfg): + self.cfg = cfg + self.logged = False + self.metrics = self.__maybe_load_metrics() + + def __maybe_load_metrics(self): + metrics = {} + for metric in self.cfg.eval_causal_lm_metrics: + try: + metrics[metric] = evaluate.load(metric) + except Exception as exc: # pylint: disable=broad-exception-caught + LOG.warning(f"{metric}: {exc.args}") + return metrics + + def on_evaluate( + self, + args: AxolotlTrainingArguments, # pylint: disable=unused-argument + state: TrainerState, + control: TrainerControl, + train_dataloader, # pylint: disable=unused-argument + eval_dataloader, + **kwargs, # pylint: disable=unused-argument + ): + trainer.model.eval() + device = torch.device(self.cfg.device) + + # pylint: disable=duplicate-code + generation_config = GenerationConfig( + max_new_tokens=self.cfg.eval_max_new_tokens, + bos_token_id=tokenizer.bos_token_id, + eos_token_id=tokenizer.eos_token_id, + pad_token_id=tokenizer.pad_token_id, + do_sample=False, + use_cache=True, + return_dict_in_generate=True, + output_attentions=False, + output_hidden_states=False, + output_scores=False, + ) + + def find_ranges(lst): + ranges = [] + start = 0 + for i in range(1, len(lst)): + if lst[i] == 0: + ranges.append((start, i - 1)) + start = i + end = len(lst) - 1 + ranges.append((start, end)) + return ranges + + def compute(metric: evaluate.Metric, **kwargs): + # safely compute a metric and return the score if the format is correct + metric_score = None + try: + metric_score = metric.compute(**kwargs) + return ( + metric_score["score"] + if "score" in metric_score + else metric_score["mean_score"] + ) + except Exception: # pylint: disable=broad-exception-caught + LOG.debug( + f"Failed to compute metric {metric.name} with kwargs {kwargs.keys()}" + ) + return metric_score + + def evaluate_preds(sources, predictions, references): + scores = {} + + for metric_name, metric in self.metrics.items(): + score = compute( + metric, + references=references, + predictions=predictions, + sources=sources, + ) + score = score or compute( + metric, + references=[[r] for r in references], + predictions=predictions, + ) + scores[metric_name] = score + return scores + + def predict_with_generate(): + eval_src, eval_pred, eval_ref = [], [], [] + + for batch in tqdm(eval_dataloader): + batch_labels = batch["labels"].to(device) + batch_input_ids = batch["input_ids"].to(device) + + if "position_ids" in batch: + batch_pos_ids = batch["position_ids"].tolist() + else: + batch_pos_ids = [None] * len(batch["input_ids"]) + + prompt_token_ids_list = [] + completion_token_ids_list = [] + + for input_ids_all, labels_all, pos_ids in zip( + batch_input_ids, + batch_labels, + batch_pos_ids, + ): + if pos_ids is None: + pos_ranges = [(0, len(input_ids_all) - 1)] + else: + pos_ranges = find_ranges(pos_ids) + + for pos_range in pos_ranges: + start, end = pos_range + if start == end: + continue + + input_ids = input_ids_all[start : end + 1] + labels = labels_all[start : end + 1] + + tokens_without_loss = labels == IGNORE_INDEX + tokens_with_loss = labels != IGNORE_INDEX + tokens_exclude_padding = input_ids != tokenizer.pad_token_id + prompt_token_includes = ( + tokens_without_loss & tokens_exclude_padding + ) + + prompt_token_ids = input_ids[prompt_token_includes] + prompt_token_ids_list.append(prompt_token_ids) + + completion_token_ids = input_ids[tokens_with_loss] + completion_token_ids_list.append(completion_token_ids) + + prompt_texts = tokenizer.batch_decode( + prompt_token_ids_list, skip_special_tokens=True + ) + completion_texts = tokenizer.batch_decode( + completion_token_ids_list, skip_special_tokens=True + ) + + with torch.no_grad(): + prompt_encoding = tokenizer( + prompt_texts, padding=True, return_tensors="pt" + ).to(self.cfg.device) + predictions = trainer.model.generate( + **prompt_encoding, generation_config=generation_config + ) + + prediction_all_tokens = predictions["sequences"].cpu().tolist() + prediction_without_prompt_tokens_list = [] + for prompt_token_ids, prediction_tokens in zip( + prompt_token_ids_list, prediction_all_tokens + ): + prediction_without_prompt_tokens = prediction_tokens[ + len(prompt_token_ids) : + ] + prediction_without_prompt_tokens_list.append( + prediction_without_prompt_tokens + ) + + predicted_texts = tokenizer.batch_decode( + prediction_without_prompt_tokens_list, skip_special_tokens=True + ) + + eval_src.extend(prompt_texts) + eval_pred.extend(predicted_texts) + eval_ref.extend(completion_texts) + + return eval_src, eval_pred, eval_ref + + if is_main_process(): + eval_preds = predict_with_generate() + trainer.log(evaluate_preds(*eval_preds)) + + return control + + return CausalLMBenchEvalCallback + + +def log_prediction_callback_factory(trainer: Trainer, tokenizer): + class LogPredictionCallback(TrainerCallback): + """Callback to log prediction values during each evaluation""" + + def __init__(self, cfg): + self.cfg = cfg + self.logged = False + + def on_evaluate( + self, + args: AxolotlTrainingArguments, # pylint: disable=unused-argument + state: TrainerState, + control: TrainerControl, + train_dataloader, # pylint: disable=unused-argument + eval_dataloader, + **kwargs, # pylint: disable=unused-argument + ): + eval_table_size = self.cfg.eval_table_size + + if eval_table_size <= 0: + return control + + trainer.model.eval() + device = torch.device(self.cfg.device) + + # pylint: disable=duplicate-code + generation_config = GenerationConfig( + max_new_tokens=self.cfg.eval_max_new_tokens, + bos_token_id=tokenizer.bos_token_id, + eos_token_id=tokenizer.eos_token_id, + pad_token_id=tokenizer.pad_token_id, + do_sample=False, + use_cache=True, + return_dict_in_generate=True, + output_attentions=False, + output_hidden_states=False, + output_scores=False, + ) + + def logits_to_tokens(logits) -> torch.Tensor: + probabilities = torch.softmax(logits, dim=-1) + # Get the predicted token ids (the ones with the highest probability) + predicted_token_ids = torch.argmax(probabilities, dim=-1) + return predicted_token_ids + + def find_ranges(lst): + ranges = [] + start = 0 + for i in range(1, len(lst)): + if lst[i] == 0: + ranges.append((start, i - 1)) + start = i + end = len(lst) - 1 + ranges.append((start, end)) + return ranges + + def log_table_from_dataloader(name: str, table_dataloader): + table = wandb.Table( # type: ignore[attr-defined] + columns=[ + "id", + "Prompt", + "Correct Completion", + "Predicted Completion (model.generate)", + "Predicted Completion (trainer.prediction_step)", + ] + ) + row_index = 0 + + for batch in tqdm(table_dataloader): + if row_index > eval_table_size: + break + + batch_labels = batch["labels"].to(device) + batch_input_ids = batch["input_ids"].to(device) + + if "position_ids" in batch: + batch_pos_ids = batch["position_ids"].tolist() + else: + batch_pos_ids = [None] * len(batch["input_ids"]) + + (_, batch_logits, _) = trainer.prediction_step( + trainer.model, + batch, + prediction_loss_only=False, + ) + + prompt_token_ids_list = [] + pred_step_token_ids_list = [] + completion_token_ids_list = [] + + for input_ids_all, labels_all, pos_ids, logits in zip( + batch_input_ids, + batch_labels, + batch_pos_ids, + batch_logits, + ): + if pos_ids is None: + pos_ranges = [(0, len(input_ids_all) - 1)] + else: + pos_ranges = find_ranges(pos_ids) + + for pos_range in pos_ranges: + start, end = pos_range + if start == end: + continue + + input_ids = input_ids_all[start : end + 1] + labels = labels_all[start : end + 1] + + tokens_without_loss = labels == IGNORE_INDEX + tokens_with_loss = labels != IGNORE_INDEX + tokens_exclude_padding = input_ids != tokenizer.pad_token_id + prompt_token_includes = ( + tokens_without_loss & tokens_exclude_padding + ) + + prompt_token_ids = input_ids[prompt_token_includes] + prompt_token_ids_list.append(prompt_token_ids) + + completion_token_ids = input_ids[tokens_with_loss] + completion_token_ids_list.append(completion_token_ids) + + pred_step_token_ids = logits_to_tokens( + logits[start : end + 1] + )[tokens_with_loss] + pred_step_token_ids_list.append(pred_step_token_ids) + + prompt_texts = tokenizer.batch_decode( + prompt_token_ids_list, skip_special_tokens=True + ) + completion_texts = tokenizer.batch_decode( + completion_token_ids_list, skip_special_tokens=True + ) + pred_step_texts = tokenizer.batch_decode( + pred_step_token_ids_list, skip_special_tokens=True + ) + + with torch.no_grad(): + prompt_encoding = tokenizer( + prompt_texts, padding=True, return_tensors="pt" + ).to(self.cfg.device) + predictions = trainer.model.generate( + **prompt_encoding, generation_config=generation_config + ) + + prediction_all_tokens = predictions["sequences"].cpu().tolist() + prediction_without_prompt_tokens_list = [] + for prompt_token_ids, prediction_tokens in zip( + prompt_token_ids_list, prediction_all_tokens + ): + prediction_without_prompt_tokens = prediction_tokens[ + len(prompt_token_ids) : + ] + prediction_without_prompt_tokens_list.append( + prediction_without_prompt_tokens + ) + + predicted_texts = tokenizer.batch_decode( + prediction_without_prompt_tokens_list, skip_special_tokens=True + ) + + for ( + prompt_text, + completion_text, + prediction_text, + pred_step_text, + ) in zip( + prompt_texts, completion_texts, predicted_texts, pred_step_texts + ): + table.add_data( + row_index, + prompt_text, + completion_text, + prediction_text, + pred_step_text, + ) + row_index += 1 + + wandb.run.log({f"{name} - Predictions vs Ground Truth": table}) # type: ignore[attr-defined] + + if is_main_process(): + log_table_from_dataloader("Eval", eval_dataloader) + + return control + + return LogPredictionCallback + + +class SaveAxolotlConfigtoWandBCallback(TrainerCallback): + """Callback to save axolotl config to wandb""" + + def __init__(self, axolotl_config_path): + self.axolotl_config_path = axolotl_config_path + + def on_train_begin( + self, + args: AxolotlTrainingArguments, # pylint: disable=unused-argument + state: TrainerState, # pylint: disable=unused-argument + control: TrainerControl, + **kwargs, # pylint: disable=unused-argument + ): + if is_main_process(): + try: + # sync config to top level in run, cannot delete file right away because wandb schedules it to be synced even w/policy = 'now', so let OS delete it later. + with NamedTemporaryFile( + mode="w", delete=False, suffix=".yml", prefix="axolotl_config_" + ) as temp_file: + copyfile(self.axolotl_config_path, temp_file.name) + wandb.save(temp_file.name) + LOG.info( + "The Axolotl config has been saved to the WandB run under files." + ) + except (FileNotFoundError, ConnectionError) as err: + LOG.warning(f"Error while saving Axolotl config to WandB: {err}") + return control diff --git a/src/axolotl/utils/callbacks/mlflow_.py b/src/axolotl/utils/callbacks/mlflow_.py new file mode 100644 index 0000000000000000000000000000000000000000..fcbb88edcd4eae61fdeae19678a1edba9f0faa41 --- /dev/null +++ b/src/axolotl/utils/callbacks/mlflow_.py @@ -0,0 +1,44 @@ +"""MLFlow module for trainer callbacks""" +import logging +from shutil import copyfile +from tempfile import NamedTemporaryFile +from typing import TYPE_CHECKING + +import mlflow +from transformers import TrainerCallback, TrainerControl, TrainerState + +from axolotl.utils.distributed import is_main_process + +if TYPE_CHECKING: + from axolotl.core.trainer_builder import AxolotlTrainingArguments + +LOG = logging.getLogger("axolotl.callbacks") + + +class SaveAxolotlConfigtoMlflowCallback(TrainerCallback): + # pylint: disable=duplicate-code + """Callback to save axolotl config to mlflow""" + + def __init__(self, axolotl_config_path): + self.axolotl_config_path = axolotl_config_path + + def on_train_begin( + self, + args: "AxolotlTrainingArguments", # pylint: disable=unused-argument + state: TrainerState, # pylint: disable=unused-argument + control: TrainerControl, + **kwargs, # pylint: disable=unused-argument + ): + if is_main_process(): + try: + with NamedTemporaryFile( + mode="w", delete=False, suffix=".yml", prefix="axolotl_config_" + ) as temp_file: + copyfile(self.axolotl_config_path, temp_file.name) + mlflow.log_artifact(temp_file.name, artifact_path="") + LOG.info( + "The Axolotl config has been saved to the MLflow artifacts." + ) + except (FileNotFoundError, ConnectionError) as err: + LOG.warning(f"Error while saving Axolotl config to MLflow: {err}") + return control diff --git a/src/axolotl/utils/chat_templates.py b/src/axolotl/utils/chat_templates.py new file mode 100644 index 0000000000000000000000000000000000000000..fd34b4ea99164945a9301c93c07edcd508b50253 --- /dev/null +++ b/src/axolotl/utils/chat_templates.py @@ -0,0 +1,31 @@ +""" +This module provides functionality for selecting chat templates based on user choices. +These templates are used for formatting messages in a conversation. +""" + + +def chat_templates(user_choice: str): + """ + Finds the correct chat_template for the tokenizer_config. + + Args: + user_choice (str): The user's choice of template. + + Returns: + str: The chosen template string. + + Raises: + ValueError: If the user_choice is not found in the templates. + """ + + templates = { + "alpaca": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### Instruction: ' + message['content'] + '\n\n' }}{% elif message['role'] == 'assistant' %}{{ '### Response: ' + message['content'] + eos_token}}{% endif %}{% endfor %}", + "inst": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}", # I don't know what this one is called. Used by Mistral/Mixtral. + "chatml": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "gemma": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + } + + if user_choice in templates: + return templates[user_choice] + + raise ValueError(f"Template '{user_choice}' not found.") diff --git a/src/axolotl/utils/collators.py b/src/axolotl/utils/collators.py new file mode 100644 index 0000000000000000000000000000000000000000..8512b9408c8042717a591ac5d9cac5c7c3e6af65 --- /dev/null +++ b/src/axolotl/utils/collators.py @@ -0,0 +1,232 @@ +""" +DataCollator for axolotl to pad labels and position_ids for packed sequences +""" +from dataclasses import dataclass +from typing import Any, Dict, Optional, Sequence, Union + +import numpy as np +import torch +import transformers +from transformers import PreTrainedTokenizerBase +from transformers.utils import PaddingStrategy + +IGNORE_INDEX = -100 + + +@dataclass +class DataCollatorForSeq2Seq: + """ + Data collator that will dynamically pad the inputs received, as well as the labels and position_ids + + Args: + tokenizer ([`PreTrainedTokenizer`] or [`PreTrainedTokenizerFast`]): + The tokenizer used for encoding the data. + model ([`PreTrainedModel`]): + The model that is being trained. If set and has the *prepare_decoder_input_ids_from_labels*, use it to + prepare the *decoder_input_ids* + + This is useful when using *label_smoothing* to avoid calculating loss twice. + padding (`bool`, `str` or [`~utils.PaddingStrategy`], *optional*, defaults to `True`): + Select a strategy to pad the returned sequences (according to the model's padding side and padding index) + among: + + - `True` or `'longest'` (default): Pad to the longest sequence in the batch (or no padding if only a single + sequence is provided). + - `'max_length'`: Pad to a maximum length specified with the argument `max_length` or to the maximum + acceptable input length for the model if that argument is not provided. + - `False` or `'do_not_pad'`: No padding (i.e., can output a batch with sequences of different lengths). + max_length (`int`, *optional*): + Maximum length of the returned list and optionally padding length (see above). + pad_to_multiple_of (`int`, *optional*): + If set will pad the sequence to a multiple of the provided value. + + This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >= + 7.5 (Volta). + label_pad_token_id (`int`, *optional*, defaults to -100): + The id to use when padding the labels (-100 will be automatically ignored by PyTorch loss functions). + return_tensors (`str`): + The type of Tensor to return. Allowable values are "np", "pt" and "tf". + """ + + tokenizer: PreTrainedTokenizerBase + model: Optional[Any] = None + padding: Union[bool, str, PaddingStrategy] = True + max_length: Optional[int] = None + pad_to_multiple_of: Optional[int] = None + label_pad_token_id: int = -100 + position_pad_token_id: int = 0 + return_tensors: str = "pt" + + def __call__(self, features, return_tensors=None): + labels = None + if return_tensors is None: + return_tensors = self.return_tensors + + for feature_name, pad_token_id in [ + ("labels", self.label_pad_token_id), + ("position_ids", self.position_pad_token_id), + ]: + feat = ( + [feature[feature_name] for feature in features] + if feature_name in features[0].keys() + else None + ) + labels = feat if feat and feature_name == "labels" else labels + # We have to pad the labels before calling `tokenizer.pad` as this method won't pad them and needs them of the + # same length to return tensors. + if feat is not None: + max_feature_length = max(len(l) for l in feat) # noqa: E741 + if self.pad_to_multiple_of is not None: + max_feature_length = ( + (max_feature_length + self.pad_to_multiple_of - 1) + // self.pad_to_multiple_of + * self.pad_to_multiple_of + ) + + padding_side = self.tokenizer.padding_side + for feature in features: + remainder = [pad_token_id] * ( + max_feature_length - len(feature[feature_name]) + ) + if isinstance(feature[feature_name], list): + feature[feature_name] = ( + feature[feature_name] + remainder + if padding_side == "right" + else remainder + feature[feature_name] + ) + elif padding_side == "right": + feature[feature_name] = np.concatenate( + [feature[feature_name], remainder] + ).astype(np.int64) + else: + feature[feature_name] = np.concatenate( + [remainder, feature[feature_name]] + ).astype(np.int64) + + features = self.tokenizer.pad( + features, + padding=self.padding, + max_length=self.max_length, + pad_to_multiple_of=self.pad_to_multiple_of, + return_tensors=return_tensors, + ) + + # prepare decoder_input_ids + if ( + labels is not None + and self.model is not None + and hasattr(self.model, "prepare_decoder_input_ids_from_labels") + ): + decoder_input_ids = self.model.prepare_decoder_input_ids_from_labels( + labels=features["labels"] + ) + features["decoder_input_ids"] = decoder_input_ids + + return features + + +@dataclass +class BatchSamplerDataCollatorForSeq2Seq(DataCollatorForSeq2Seq): + """ + Collator for multipack specific to the using the BatchSampler + """ + + def __call__(self, features, return_tensors=None): + if not isinstance(features[0], list): + features = [features] + out_features = [{} for _ in features] + for i, features_ in enumerate(features): + for feature in features_[0].keys(): + if feature == "length": + continue + if feature == "attention_mask": + arrays = [ + (1) * np.array(item[feature]) + for i, item in enumerate(features_) + if feature in item + ] + out_features[i][feature] = np.concatenate(arrays) + else: + arrays = [ + np.array(item[feature]) for item in features_ if feature in item + ] + out_features[i][feature] = np.concatenate(arrays) + return super().__call__(out_features, return_tensors=return_tensors) + + +@dataclass +class V2BatchSamplerDataCollatorForSeq2Seq(DataCollatorForSeq2Seq): + """ + Collator for multipack specific to the using the BatchSampler + """ + + def __call__(self, features, return_tensors=None): + if not isinstance(features[0], list): + features = [features] + out_features = [{} for _ in features] + for i, features_ in enumerate(features): + for feature in features_[0].keys(): + if feature == "length": + continue + if feature == "attention_mask": + arrays = [ + (i + 1) * np.array(item[feature]) + for i, item in enumerate(features_) + if feature in item + ] + out_features[i][feature] = np.concatenate(arrays) + else: + arrays = [ + np.array(item[feature]) for item in features_ if feature in item + ] + out_features[i][feature] = np.concatenate(arrays) + return super().__call__(out_features, return_tensors=return_tensors) + + +@dataclass +class MambaDataCollator: + """ + Collator for State Space Models (Mamba) + """ + + tokenizer: transformers.PreTrainedTokenizer + + def __call__(self, instances: Sequence[Dict]) -> Dict[str, torch.Tensor]: + input_ids, labels = tuple( + [torch.LongTensor(instance[key]) for instance in instances] + for key in ("input_ids", "labels") + ) + input_ids = torch.nn.utils.rnn.pad_sequence( + input_ids, + batch_first=True, + padding_value=self.tokenizer.pad_token_id, + ) + labels = torch.nn.utils.rnn.pad_sequence( + labels, batch_first=True, padding_value=IGNORE_INDEX + ) + + return { + "input_ids": input_ids, + "labels": labels, + } + + +@dataclass +class PretrainingBatchSamplerDataCollatorForSeq2Seq(DataCollatorForSeq2Seq): + """ + Collator for multipack specific to the using the BatchSampler + """ + + def __call__(self, features, return_tensors=None): + chunked_data = {} + for feature in features.keys(): + if feature == "length": + continue + if feature == "attention_mask": + arrays = [(1) * np.array(item) for item in features[feature]] + chunked_data[feature] = np.concatenate(arrays) + else: + arrays = [np.array(item) for item in features[feature]] + chunked_data[feature] = np.concatenate(arrays) + features = [chunked_data] + return super().__call__(features, return_tensors=return_tensors) diff --git a/src/axolotl/utils/config/__init__.py b/src/axolotl/utils/config/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..3e743bda9f49b6e78e0e02fe71fc21d94b429ee9 --- /dev/null +++ b/src/axolotl/utils/config/__init__.py @@ -0,0 +1,602 @@ +"""Module for working with config dicts""" +import json +import logging +import os +from pathlib import Path +from typing import Optional + +import torch +from transformers.utils import is_torch_bf16_gpu_available + +from axolotl.utils.bench import log_gpu_memory_usage +from axolotl.utils.config.models.input.v0_4_1 import ( + AxolotlConfigWCapabilities, + AxolotlInputConfig, +) +from axolotl.utils.dict import DictDefault +from axolotl.utils.models import load_model_config + +LOG = logging.getLogger("axolotl") + + +def choose_device(cfg): + def get_device(): + try: + if torch.cuda.is_available(): + return f"cuda:{cfg.local_rank}" + + if torch.backends.mps.is_available(): + return "mps" + + raise SystemError("No CUDA/mps device found") + except Exception: # pylint: disable=broad-exception-caught + return "cpu" + + cfg.device = get_device() + if cfg.world_size == 1: + cfg.device_map = cfg.device_map or "auto" + else: + if cfg.device.startswith("cuda"): + cfg.device_map = {"": torch.cuda.current_device()} + else: + cfg.device_map = {"": cfg.device} + + # in `accelerate launch`, we need to not pass through any device map and let + # accelerate figure out which parts of the model to put on which gpu + accelerate_vars = [var for var in os.environ if var.startswith("ACCELERATE_USE_")] + if accelerate_vars: + cfg.device_map = None + + +def normalize_config(cfg): + # setup some derived config / hyperparams + cfg.gradient_accumulation_steps = cfg.gradient_accumulation_steps or ( + cfg.batch_size // cfg.micro_batch_size + ) + cfg.batch_size = ( + cfg.batch_size or cfg.micro_batch_size * cfg.gradient_accumulation_steps + ) + if cfg.eval_batch_size is None: + cfg.eval_batch_size = cfg.micro_batch_size + cfg.world_size = int(os.environ.get("WORLD_SIZE", 1)) + cfg.local_rank = int(os.environ.get("LOCAL_RANK", 0)) + cfg.eval_table_size = cfg.eval_table_size or 0 + cfg.eval_max_new_tokens = cfg.eval_max_new_tokens or 128 + cfg.eval_causal_lm_metrics = cfg.eval_causal_lm_metrics or [ + "sacrebleu", + "comet", + "ter", + "chrf", + ] + choose_device(cfg) + cfg.ddp = cfg.ddp if cfg.ddp is not None else cfg.world_size != 1 + if cfg.ddp: + cfg.device_map = {"": int(os.environ.get("LOCAL_RANK", 0))} + cfg.batch_size = cfg.batch_size * cfg.world_size + + if cfg.bf16 == "auto": + if is_torch_bf16_gpu_available(): + LOG.debug("bf16 support detected, enabling for this configuration.") + cfg.bf16 = True + else: + LOG.debug("bf16 support not detected, disabling for this configuration.") + cfg.bf16 = False + if cfg.fp16 is None: + cfg.fp16 = True + + if cfg.device == "mps": + cfg.load_in_8bit = False + cfg.tf32 = False + if cfg.bf16: + cfg.fp16 = True + cfg.bf16 = False + else: + torch.backends.cuda.matmul.allow_tf32 = cfg.tf32 or False + if cfg.bf16: + cfg.fp16 = False + + if cfg.bf16 or cfg.bfloat16: + cfg.torch_dtype = torch.bfloat16 + elif cfg.load_in_8bit or cfg.fp16 or cfg.float16: + cfg.torch_dtype = torch.float16 + else: + cfg.torch_dtype = torch.float32 + + if cfg.saves_per_epoch: + save_steps = 1.0 / (cfg.saves_per_epoch * cfg.num_epochs) + if save_steps < 1.0: # prevent saves on every step + cfg.save_steps = save_steps + if (cfg.val_set_size or cfg.test_datasets) and cfg.evals_per_epoch: + eval_steps = 1.0 / (cfg.evals_per_epoch * cfg.num_epochs) + if eval_steps < 1.0: # prevent evals on every step + cfg.eval_steps = eval_steps + + cfg.dataset_processes = cfg.dataset_processes or os.cpu_count() + + if not cfg.base_model_config: + cfg.base_model_config = cfg.base_model + + model_config = load_model_config(cfg) + cfg.model_config_type = model_config.model_type + + # figure out if the model is llama + cfg.is_llama_derived_model = ( + (hasattr(model_config, "model_type") and model_config.model_type == "llama") + or cfg.is_llama_derived_model + or "llama" in cfg.base_model.lower() + or (cfg.type_of_model and "llama" in cfg.type_of_model.lower()) + ) + + # figure out if the model is falcon + cfg.is_falcon_derived_model = ( + ( + hasattr(model_config, "model_type") + and model_config.model_type + in [ + "falcon", + "RefinedWebModel", + "RefinedWeb", + ] + ) + or cfg.is_falcon_derived_model + or "falcon" in cfg.base_model.lower() + or (cfg.type_of_model and "rwforcausallm" in cfg.type_of_model.lower()) + ) + + cfg.is_mistral_derived_model = ( + ( + hasattr(model_config, "model_type") + and model_config.model_type + in [ + "mistral", + ] + ) + or cfg.is_mistral_derived_model + or "mistral" in cfg.base_model.lower().split("/")[-1] + or (cfg.type_of_model and "mistral" in cfg.type_of_model.lower()) + ) + + cfg.is_qwen_derived_model = ( + hasattr(model_config, "model_type") + and model_config.model_type + in [ + "qwen", + ] + ) or cfg.is_qwen_derived_model + + if isinstance(cfg.pretraining_dataset, dict): + cfg.pretraining_dataset = [cfg.pretraining_dataset] + + if ( + cfg.gradient_checkpointing + and cfg.unfrozen_parameters is None + and cfg.gradient_checkpointing_kwargs is None + and cfg.rl is None + ): + cfg.gradient_checkpointing_kwargs = {"use_reentrant": True} + + log_gpu_memory_usage(LOG, "baseline", cfg.device) + + +def normalize_cfg_datasets(cfg): + """ + helpers for mapping chat_template to various dataset configurations as necessary + """ + + if cfg.chat_template and cfg.chat_template == "chatml": + if cfg.datasets: + for idx, ds_cfg in enumerate(cfg.datasets): + if ds_cfg.type == "sharegpt" and not ds_cfg.conversation: + LOG.info( + f"updating dataset {ds_cfg.path} with `conversation: chatml` to match your chat_template" + ) + cfg.datasets[idx].conversation = "chatml" + if ds_cfg.type == "orpo.chat_template" and not ds_cfg.chat_template: + LOG.info( + f"updating dataset {ds_cfg.path} with `chat_template: chatml` to match your chat_template" + ) + cfg.datasets[idx].chat_template = "chatml" + + +def validate_config(cfg: DictDefault, capabilities: Optional[dict] = None): + if capabilities: + return DictDefault( + dict( + AxolotlConfigWCapabilities( + **cfg.to_dict(), capabilities=capabilities + ).model_dump(exclude_unset=True) + ) + ) + return DictDefault( + dict(AxolotlInputConfig(**cfg.to_dict()).model_dump(exclude_unset=True)) + ) + + +def legacy_validate_config(cfg): + """ + This is a "pre-validation" step that handles the yaml configuration before we have any + information about the model architecture + """ + if is_torch_bf16_gpu_available(): + if not cfg.bf16 and not cfg.bfloat16: + LOG.info("bf16 support detected, but not enabled for this configuration.") + else: + if ( + not cfg.merge_lora + and not cfg.is_preprocess + and (cfg.bf16 is True or cfg.bfloat16 is True) + ): + raise ValueError( + "bf16 requested, but AMP is not supported on this GPU. Requires Ampere series or above." + ) + if ( + # pylint: disable=too-many-boolean-expressions + not (cfg.bf16 or cfg.bfloat16) + and (cfg.fp16 or cfg.float16) + and not cfg.adapter + and not cfg.flash_attention + and cfg.sample_packing + ): + LOG.warning( + "Full fine tune w/o FA2 w/ sample packing and fp16/float16 is likely to raise errors. Try LoRA." + ) + # ValueError: Attempting to unscale FP16 gradients. + # OR + # RuntimeError: expected mat1 and mat2 to have the same dtype, but got: float != c10::Half + if cfg.max_packed_sequence_len: + raise DeprecationWarning("`max_packed_sequence_len` is no longer supported") + + if cfg.sample_packing and cfg.rl: + raise ValueError("`sample_packing: true` does not work with RLHF training") + + if cfg.sample_packing and not cfg.pad_to_sequence_len: + LOG.warning( + "`pad_to_sequence_len: true` is recommended when using sample_packing" + ) + + if cfg.gradient_accumulation_steps and cfg.batch_size: + raise ValueError( + "please set only one of gradient_accumulation_steps or batch_size" + ) + if cfg.batch_size: + LOG.warning( + "%s\n%s", + "batch_size is not recommended. Please use gradient_accumulation_steps instead.", + "To calculate the equivalent gradient_accumulation_steps, divide batch_size / micro_batch_size / number of gpus.", + ) + if ( + cfg.eval_batch_size + and cfg.micro_batch_size + and cfg.eval_batch_size != cfg.micro_batch_size + ): + LOG.warning( + "eval_batch_size != micro_batch_size. This can lead to VRAM instability." + ) + + if cfg.adapter == "qlora": + if cfg.merge_lora: + # can't merge qlora if loaded in 8bit or 4bit + if cfg.load_in_8bit: + raise ValueError("Can't merge qlora if loaded in 8bit") + + if cfg.gptq: + raise ValueError("Can't merge qlora if gptq") + + if cfg.load_in_4bit: + raise ValueError("Can't merge qlora if loaded in 4bit") + + else: + if cfg.load_in_8bit: + raise ValueError("Can't load qlora in 8bit") + + if cfg.gptq: + raise ValueError("Can't load qlora if gptq") + + if not cfg.load_in_4bit: + raise ValueError("Require cfg.load_in_4bit to be True for qlora") + + if cfg.flash_attn_fuse_qkv or cfg.flash_attn_fuse_mlp: + raise ValueError("Fused modules are not supported with QLoRA") + + loftq = cfg.peft and cfg.peft.loftq_config and cfg.peft.loftq_config.loftq_bits + if not cfg.load_in_8bit and cfg.adapter == "lora" and not loftq: + LOG.warning("We recommend setting `load_in_8bit: true` for LORA finetuning") + + if cfg.adapter == "lora" and (cfg.flash_attn_fuse_qkv or cfg.flash_attn_fuse_mlp): + raise ValueError("Fused modules are not supported with LoRA") + + if cfg.adapter and cfg.peft_layers_to_transform and cfg.unfrozen_parameters: + raise ValueError( + "`unfrozen_parameters` used with `peft_layers_to_transform` can have unexpected behavior." + ) + + if cfg.relora_steps: + if cfg.adapter not in ("lora", "qlora"): + raise ValueError("cfg.adapter must be lora or qlora to use ReLoRA") + + if cfg.fsdp: + raise ValueError("fsdp not supported with ReLoRA") + + if cfg.deepspeed: + raise ValueError("deepspeed not supported with ReLoRA") + + if cfg.lr_scheduler == "one_cycle": + raise ValueError("ReLoRA is not compatible with the one_cycle scheduler") + + if cfg.flash_attn_fuse_qkv or cfg.flash_attn_fuse_mlp: + raise ValueError("Fused modules are not supported with ReLoRA") + + if cfg.trust_remote_code: + LOG.warning( + "`trust_remote_code` is set to true. Please make sure that you reviewed the remote code/model." + ) + + if cfg.push_dataset_to_hub and cfg.hf_use_auth_token is not True: + raise ValueError( + "Require cfg.hf_use_auth_token to be True for push_dataset_to_hub" + ) + + if (cfg.base_model and "falcon" in cfg.base_model.lower()) and cfg.fsdp: + raise ValueError("FSDP is not supported for falcon models") + + if ( + cfg.base_model and "mpt" in cfg.base_model.lower() + ) and cfg.gradient_checkpointing: + raise ValueError("gradient_checkpointing is not supported for MPT models") + + if cfg.flash_optimum is True: + if cfg.adapter: + LOG.warning("BetterTransformers probably doesn't work with PEFT adapters") + if cfg.fp16 or cfg.bf16: + raise ValueError("AMP is not supported with BetterTransformer") + if cfg.float16 is not True and cfg.bfloat16 is not True: + LOG.warning( + "You should probably set bfloat16 or float16 to true to " + "load the model in float16 for BetterTransformers" + ) + if int(torch.__version__.split(".", maxsplit=1)[0]) < 2: + LOG.warning("torch>=2.0.0 required") + raise ValueError( + f"flash_optimum for BetterTransformers may not be used with {torch.__version__}" + ) + + if cfg.pretraining_dataset and cfg.group_by_length: + LOG.warning( + "You probably want to disable group_by_length as it will force a streamed dataset to download completely." + ) + if cfg.pretraining_dataset and not cfg.max_steps: + raise ValueError( + "max_steps must be set when using iterable pretraining_dataset, Trainer can't infer length and schedule optimizer/learning rate without it!" + ) + + if any([cfg.adam_beta1, cfg.adam_beta2, cfg.adam_epsilon]) and ( + not cfg.optimizer or "adamw" not in cfg.optimizer + ): + LOG.warning("adamw hyperparameters found, but no adamw optimizer set") + + if cfg.push_to_hub_model_id: + raise ValueError( + "push_to_hub_model_id is deprecated. Please use hub_model_id instead." + ) + + if cfg.hub_model_id and not (cfg.save_steps or cfg.saves_per_epoch): + LOG.warning( + "hub_model_id is set without any models being saved. To save a model, set either save_steps or saves_per_epoch." + ) + + if cfg.gptq and cfg.revision_of_model: + raise ValueError( + "revision_of_model is not supported for GPTQ models. " + + "Please download the model from HuggingFace Hub manually for correct branch, " + + "point to its path, and remove revision_of_model from the config." + ) + + # if cfg.sample_packing and cfg.sdp_attention: + # # incompatible due to bug w/ accelerate causing 0.0 loss when using llama2 + # raise ValueError( + # "sample_packing not compatible with sdp_attention. Use flash_attention" + # ) + + if cfg.sample_packing and cfg.xformers_attention: + raise ValueError( + "sample_packing not compatible with xformers_attention. Use flash_attention" + ) + + if cfg.sample_packing and cfg.sdp_attention and (cfg.bfloat16 or cfg.bf16): + # https://github.com/pytorch/pytorch/blob/1b03423526536b5f3d35bdfa95ccc6197556cf9b/test/test_transformers.py#L2440-L2450 + LOG.warning( + "sample_packing & torch sdpa with bf16 is unsupported may results in 0.0 loss. " + "This may work on H100s." + ) + + if cfg.early_stopping_patience: + if not cfg.save_steps or not cfg.eval_steps: + raise ValueError( + "`early_stopping_patience` requires save_steps and eval_steps to be set. eval_steps should evenly divide save_steps." + ) + if cfg.save_steps % cfg.eval_steps != 0: + raise ValueError( + "`early_stopping_patience` requires that eval_steps should evenly divide save_steps." + ) + + if cfg.datasets: + for idx, ds_cfg in enumerate(cfg.datasets): + if not ds_cfg.type: + continue + if ds_cfg.type == "sharegpt:chat": + LOG.warning( + PendingDeprecationWarning( + "`type: sharegpt:chat` will soon be deprecated. simply use `type: sharegpt` instead." + ) + ) + cfg.datasets[idx].type = "sharegpt" + if "sharegpt_simple" in ds_cfg.type: + LOG.warning( + PendingDeprecationWarning( + "`type: sharegpt_simple` will soon be deprecated. simply use `type: sharegpt` instead." + ) + ) + cfg.datasets[idx].type = cfg.datasets[idx].type.replace( + "sharegpt_simple", "sharegpt" + ) + + if cfg.saves_per_epoch and cfg.save_steps: + raise ValueError( + "save_steps and saves_per_epoch are mutually exclusive and cannot be used together." + ) + if cfg.saves_per_epoch and cfg.save_strategy and cfg.save_strategy != "steps": + raise ValueError( + "save_strategy must be empty or set to `steps` when used with saves_per_epoch." + ) + if cfg.evals_per_epoch and cfg.eval_steps: + raise ValueError( + "eval_steps and evals_per_epoch are mutually exclusive and cannot be used together." + ) + if ( + cfg.evals_per_epoch + and cfg.evaluation_strategy + and cfg.evaluation_strategy != "steps" + ): + raise ValueError( + "evaluation_strategy must be empty or set to `steps` when used with evals_per_epoch." + ) + if cfg.save_strategy and cfg.save_steps and cfg.save_strategy != "steps": + raise ValueError( + "save_strategy and save_steps mismatch. Please set save_strategy to 'steps' or remove save_steps." + ) + + if ( + cfg.evaluation_strategy + and cfg.eval_steps + and cfg.evaluation_strategy != "steps" + ): + raise ValueError( + "evaluation_strategy and eval_steps mismatch. Please set evaluation_strategy to 'steps' or remove eval_steps." + ) + + if ( + cfg.val_set_size == 0 + and (cfg.eval_steps or cfg.evaluation_strategy) + and not cfg.test_datasets + ): + raise ValueError( + "eval_steps and evaluation_strategy are not supported with val_set_size == 0" + ) + + if ( + cfg.sample_packing + and cfg.eval_table_size + and cfg.eval_sample_packing is not False + ): + raise ValueError( + "eval_table_size and eval_sample_packing are not supported together with sample_packing. Please set 'eval_sample_packing' to false." + ) + + if not cfg.adapter and (cfg.load_in_8bit or cfg.load_in_4bit): + raise ValueError( + "load_in_8bit and load_in_4bit are not supported without setting an adapter." + "If you want to full finetune, please turn off load_in_8bit and load_in_4bit." + ) + + if cfg.rope_scaling: + LOG.warning("`rope_scaling` should now be be a key under `model_config`") + + if cfg.wandb_run_id and not cfg.wandb_name: + cfg.wandb_name = cfg.wandb_run_id + + LOG.warning( + "wandb_run_id sets the ID of the run. If you would like to set the name, please use wandb_name instead." + ) + + if cfg.noisy_embedding_alpha is not None: + # Deprecated, use neftune_noise_alpha + LOG.warning("noisy_embedding_alpha is deprecated, use neftune_noise_alpha") + if cfg.neftune_noise_alpha is None: + cfg.neftune_noise_alpha = cfg.noisy_embedding_alpha + else: + # User is providing both; bail and have them sort out their settings + raise ValueError( + "noisy_embedding_alpha is deprecated, use neftune_noise_alpha; both are set, please remove the deprecated noisy_embedding_alpha setting" + ) + + if cfg.neftune_noise_alpha is not None and cfg.neftune_noise_alpha <= 0.0: + raise ValueError("neftune_noise_alpha must be > 0.0") + + if cfg.max_memory is not None and cfg.gpu_memory_limit is not None: + raise ValueError( + "max_memory and gpu_memory_limit are mutually exclusive and cannot be used together." + ) + + if ( + cfg.unfrozen_parameters + and cfg.gradient_checkpointing_kwargs + and cfg.gradient_checkpointing_kwargs.use_reentrant is True + ): + # https://github.com/huggingface/transformers/issues/21381 + raise ValueError( + "`use_reentrant` must be false when used with partially frozen model." + ) + + if cfg.deepspeed and Path(cfg.deepspeed).is_file(): + with open(cfg.deepspeed, encoding="utf-8") as file: + contents = file.read() + deepspeed_cfg: DictDefault = DictDefault(json.loads(contents)) + if cfg.flash_attention: + if ( + deepspeed_cfg.zero_optimization + and deepspeed_cfg.zero_optimization.stage == 3 + ): + if not ( + ( + deepspeed_cfg.bf16 + and deepspeed_cfg.bf16.enabled # pylint: disable=no-member + is True + ) + or ( + deepspeed_cfg.fp16 + and deepspeed_cfg.fp16.enabled # pylint: disable=no-member + is True + ) + ): + raise ValueError( + "bf16.enabled or fp16.enabled must be set to true when using ZeRO-3 with flash-attention" + ) + if "8bit" in cfg.optimizer and deepspeed_cfg.optimizer: + LOG.warning( + f"conflicting optimizer: {cfg.optimizer} used alongside deepspeed optimizer." + ) + + if cfg.test_datasets and cfg.val_set_size: + raise ValueError( + "non-zero val_set_size should not be used with test_datasets configuration" + ) + + if cfg.fsdp and "bnb" in cfg.optimizer: + raise ValueError(f"FSDP not compatible with {cfg.optimizer}") + + if cfg.do_causal_lm_eval and cfg.eval_sample_packing: + raise ValueError( + "do_causal_lm_eval is enabled, eval_sample_packing must be set to False" + ) + + if cfg.eval_causal_lm_metrics: + supported_metrics = ["sacrebleu", "comet", "ter", "chrf"] + if not isinstance(cfg.eval_causal_lm_metrics, list): + raise ValueError("eval_causal_lm_metrics must be a list") + # only ["sacrebleu", "comet", "ter", "chrf"] supported + if set(cfg.eval_causal_lm_metrics) - set(supported_metrics): + raise ValueError( + f"eval_causal_lm_metrics must be one of {supported_metrics}" + ) + + # TODO + # MPT 7b + # https://github.com/facebookresearch/bitsandbytes/issues/25 + # no 8bit adaAmw w bf16 + + # GPT-NeoX + # evals broken when extending context len + # File "/root/miniconda3/envs/py3.9/lib/python3.9/site-packages/transformers/models/gpt_neox/modeling_gpt_neox.py", line 162, in forward attn_output, attn_weights = self._attn(query, key, value, attention_mask, head_mask) + # File "/root/miniconda3/envs/py3.9/lib/python3.9/site-packages/optimum/bettertransformer/models/attention.py", line 74, in gpt2_wrapped_scaled_dot_product + # attention_mask = causal_mask + attention_mask + # RuntimeError: The size of tensor a (2048) must match the size of tensor b (8132) at non-singleton dimension 3 diff --git a/src/axolotl/utils/config/models/__init__.py b/src/axolotl/utils/config/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/axolotl/utils/config/models/input/__init__.py b/src/axolotl/utils/config/models/input/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/axolotl/utils/config/models/input/next/__init__.py b/src/axolotl/utils/config/models/input/next/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/axolotl/utils/config/models/input/v0_4_1/__init__.py b/src/axolotl/utils/config/models/input/v0_4_1/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..272debce30413e9873ec702989012e530a676158 --- /dev/null +++ b/src/axolotl/utils/config/models/input/v0_4_1/__init__.py @@ -0,0 +1,1021 @@ +""" +Module for pydantic models for configuration +""" +# pylint: disable=too-many-lines + +import logging +import os +from enum import Enum +from typing import Any, Dict, List, Literal, Optional, Union + +from pydantic import BaseModel, Field, conlist, field_validator, model_validator +from transformers import SchedulerType +from transformers.training_args import OptimizerNames + +from axolotl.utils.config.models.internals import GPUCapabilities + +LOG = logging.getLogger("axolotl.utils.config.models.input") + + +class DeprecatedParameters(BaseModel): + """configurations that are deprecated""" + + max_packed_sequence_len: Optional[int] = None + rope_scaling: Optional[Any] = None + noisy_embedding_alpha: Optional[float] = None + + @field_validator("max_packed_sequence_len") + @classmethod + def validate_max_packed_sequence_len(cls, max_packed_sequence_len): + if max_packed_sequence_len: + raise DeprecationWarning("`max_packed_sequence_len` is no longer supported") + return max_packed_sequence_len + + @field_validator("rope_scaling") + @classmethod + def validate_rope_scaling(cls, rope_scaling): + if rope_scaling: + raise DeprecationWarning( + "`rope_scaling` is no longer supported, it should now be be a key under `model_config`" + ) + return rope_scaling + + @field_validator("noisy_embedding_alpha") + @classmethod + def validate_noisy_embedding_alpha(cls, noisy_embedding_alpha): + if noisy_embedding_alpha: + LOG.warning("noisy_embedding_alpha is deprecated, use neftune_noise_alpha") + return noisy_embedding_alpha + + +class RemappedParameters(BaseModel): + """parameters that have been remapped to other names""" + + overrides_of_model_config: Optional[Dict[str, Any]] = Field( + default=None, alias="model_config" + ) + type_of_model: Optional[str] = Field(default=None, alias="model_type") + revision_of_model: Optional[str] = Field(default=None, alias="model_revision") + + +class PretrainingDataset(BaseModel): + """pretraining dataset configuration subset""" + + path: Optional[str] = None + + +class UserDefinedPrompterType(BaseModel): + """structure for user defined prompt types""" + + system_prompt: Optional[str] = None + system_format: Optional[str] = None + field_system: Optional[str] = None + field_instruction: Optional[str] = None + field_input: Optional[str] = None + field_output: Optional[str] = None + + format: Optional[str] = None + no_input_format: Optional[str] = None + field: Optional[str] = None + + +class SFTDataset(BaseModel): + """SFT configuration subset""" + + path: Optional[str] = None + split: Optional[str] = None + type: Optional[Union[str, UserDefinedPrompterType]] = None + shards: Optional[int] = None + conversation: Optional[str] = None + chat_template: Optional[str] = None + data_files: Optional[Union[str, List[str]]] = None + name: Optional[str] = None + ds_type: Optional[str] = None + train_on_split: Optional[str] = None + + field_human: Optional[str] = None + field_model: Optional[str] = None + + roles: Optional[Dict[str, List[str]]] = None + + +class UserDefinedDPOType(BaseModel): + """User defined typing for DPO""" + + field_system: Optional[str] = None + field_prompt: Optional[str] = None + field_chosen: Optional[str] = None + field_rejected: Optional[str] = None + prompt_format: Optional[str] = None + chosen_format: Optional[str] = None + rejected_format: Optional[str] = None + + +class DPODataset(BaseModel): + """DPO configuration subset""" + + path: Optional[str] = None + split: Optional[str] = None + type: Optional[Union[UserDefinedDPOType, str]] = None + data_files: Optional[List[str]] = None + + +class RLType(str, Enum): + """RL trainer type configuration subset""" + + dpo = "dpo" # pylint: disable=invalid-name + ipo = "ipo" # pylint: disable=invalid-name + kto_pair = "kto_pair" # pylint: disable=invalid-name + orpo = "orpo" # pylint: disable=invalid-name + + +class ChatTemplate(str, Enum): + """Chat templates configuration subset""" + + alpaca = "alpaca" # pylint: disable=invalid-name + chatml = "chatml" # pylint: disable=invalid-name + inst = "inst" # pylint: disable=invalid-name + gemma = "gemma" # pylint: disable=invalid-name + + +class LoftQConfig(BaseModel): + """LoftQ configuration subset""" + + loftq_bits: int = Field(default=4, metadata={"help": "Quantization bits for LoftQ"}) + # loftq_iter: int = Field(default=1, metadata={"help": "Alternating iterations for LoftQ"}) + + +class PeftConfig(BaseModel): + """peftq configuration subset""" + + loftq_config: Optional[LoftQConfig] = None + + +class AutoType(str, Enum): + """auto type string configuration subset - used for bf16""" + + AUTO = "auto" + + +class SpecialTokensConfig(BaseModel): + """Special tokens configuration subset""" + + bos_token: Optional[str] = None + eos_token: Optional[str] = None + pad_token: Optional[str] = None + unk_token: Optional[str] = None + additional_special_tokens: Optional[List[str]] = None + + +class LoraConfig(BaseModel): + """Peft / LoRA configuration subset""" + + load_in_8bit: Optional[bool] = Field(default=False) + load_in_4bit: Optional[bool] = Field(default=False) + + adapter: Optional[str] = None + lora_model_dir: Optional[str] = None + lora_r: Optional[int] = None + lora_alpha: Optional[int] = None + lora_fan_in_fan_out: Optional[bool] = None + lora_target_modules: Optional[List[str]] = None + lora_target_linear: Optional[bool] = None + lora_modules_to_save: Optional[List[str]] = None + lora_dropout: Optional[float] = None + peft_layers_to_transform: Optional[List[int]] = None + peft: Optional[PeftConfig] = None + peft_use_dora: Optional[bool] = None + peft_use_relora: Optional[bool] = None + + lora_on_cpu: Optional[bool] = None + gptq: Optional[bool] = None + bnb_config_kwargs: Optional[Dict[str, Any]] = None + + loraplus_lr_ratio: Optional[float] = Field( + default=None, + metadata={ + "help": "loraplus learning rate ratio lr_B / lr_A. Recommended value is 2^4." + }, + ) + loraplus_lr_embedding: Optional[float] = Field( + default=1e-6, + metadata={"help": "loraplus learning rate for lora embedding layers."}, + ) + + merge_lora: Optional[bool] = None + + @model_validator(mode="before") + @classmethod + def validate_adapter(cls, data): + if not data.get("adapter") and ( + data.get("load_in_8bit") or data.get("load_in_4bit") + ): + raise ValueError( + "load_in_8bit and load_in_4bit are not supported without setting an adapter." + "If you want to full finetune, please turn off load_in_8bit and load_in_4bit." + ) + return data + + @model_validator(mode="after") + def validate_qlora(self): + if self.adapter == "qlora": + if self.merge_lora: + # can't merge qlora if loaded in 8bit or 4bit + if self.load_in_8bit: + raise ValueError("Can't merge qlora if loaded in 8bit") + + if self.gptq: + raise ValueError("Can't merge qlora if gptq") + + if self.load_in_4bit: + raise ValueError("Can't merge qlora if loaded in 4bit") + + else: + if self.load_in_8bit: + raise ValueError("Can't load qlora in 8bit") + + if self.gptq: + raise ValueError("Can't load qlora if gptq") + + if not self.load_in_4bit: + raise ValueError("Require cfg.load_in_4bit to be True for qlora") + return self + + @model_validator(mode="before") + @classmethod + def validate_quantized_dora(cls, data): + if data.get("peft_use_dora") and ( + data.get("load_in_8bit") or data.get("load_in_4bit") + ): + raise ValueError( + "`peft_use_dora` is not currently compatible with quantized weights." + ) + return data + + +class ReLoRAConfig(BaseModel): + """ReLoRA configuration subset""" + + relora_steps: Optional[int] = None + relora_warmup_steps: Optional[int] = None + relora_anneal_steps: Optional[int] = None + relora_prune_ratio: Optional[float] = None + relora_cpu_offload: Optional[bool] = None + + +class ModelInputConfig(BaseModel): + """model to train on configuration subset""" + + base_model: str + base_model_config: Optional[str] = None + tokenizer_config: Optional[str] = None + tokenizer_use_fast: Optional[bool] = None + tokenizer_legacy: Optional[bool] = None + tokenizer_type: Optional[str] = Field( + default=None, metadata={"help": "transformers tokenizer class"} + ) + trust_remote_code: Optional[bool] = None + + @field_validator("trust_remote_code") + @classmethod + def hint_trust_remote_code(cls, trust_remote_code): + if trust_remote_code: + LOG.warning( + "`trust_remote_code` is set to true. Please make sure that you reviewed the remote code/model." + ) + return trust_remote_code + + +class HyperparametersConfig(BaseModel): + """training hyperparams configuration subset""" + + gradient_accumulation_steps: Optional[int] = Field(default=1) + micro_batch_size: Optional[int] = Field( + default=1, + metadata={"help": "per gpu micro batch size for training"}, + ) + batch_size: Optional[int] = Field( + default=None, + metadata={ + "help": "Total batch size, we do not recommended setting this manually" + }, + ) + eval_batch_size: Optional[int] = Field( + default=None, + metadata={ + "help": "per gpu micro batch size for evals, defaults to value of micro_batch_size" + }, + ) + + train_on_inputs: Optional[bool] = None + group_by_length: Optional[bool] = None + + learning_rate: Union[str, float] + weight_decay: Optional[float] = None + optimizer: Optional[Union[OptimizerNames, Literal["lion_pytorch"]]] = None + optim_args: Optional[Union[str, Dict[str, Any]]] = Field( + default=None, metadata={"help": "Optional arguments to supply to optimizer."} + ) + optim_target_modules: Optional[Union[List[str], Literal["all_linear"]]] = Field( + default=None, + metadata={ + "help": "The target modules to optimize, i.e. the module names that you would like to train." + }, + ) + torchdistx_path: Optional[str] = None + lr_scheduler: Optional[SchedulerType] = None + lr_scheduler_kwargs: Optional[Dict[str, Any]] = None + lr_quadratic_warmup: Optional[bool] = None + cosine_min_lr_ratio: Optional[float] = None + cosine_constant_lr_ratio: Optional[float] = None + lr_div_factor: Optional[float] = None + + adam_epsilon: Optional[float] = None + adam_beta1: Optional[float] = None + adam_beta2: Optional[float] = None + max_grad_norm: Optional[float] = None + num_epochs: int = Field(default=1) + + @field_validator("batch_size") + @classmethod + def hint_batch_size_set(cls, batch_size): + if batch_size: + LOG.warning( + "%s\n%s", + "batch_size is not recommended. Please use gradient_accumulation_steps instead.", + "To calculate the equivalent gradient_accumulation_steps, divide batch_size / micro_batch_size / number of gpus.", + ) + return batch_size + + @field_validator("learning_rate") + @classmethod + def convert_learning_rate(cls, learning_rate): + if learning_rate and isinstance(learning_rate, str): + learning_rate = float(learning_rate) + return learning_rate + + +class ModelOutputConfig(BaseModel): + """model save configuration subset""" + + output_dir: str = Field(default="./model-out") + hub_model_id: Optional[str] = None + hub_strategy: Optional[str] = None + save_safetensors: Optional[bool] = None + + +class MLFlowConfig(BaseModel): + """mlflow configuration subset""" + + use_mlflow: Optional[bool] = None + mlflow_tracking_uri: Optional[str] = None + mlflow_experiment_name: Optional[str] = None + hf_mlflow_log_artifacts: Optional[bool] = None + + +class WandbConfig(BaseModel): + """wandb configuration subset""" + + use_wandb: Optional[bool] = None + wandb_name: Optional[str] = None + wandb_run_id: Optional[str] = None + wandb_mode: Optional[str] = None + wandb_project: Optional[str] = None + wandb_entity: Optional[str] = None + wandb_watch: Optional[str] = None + wandb_log_model: Optional[str] = None + + @model_validator(mode="before") + @classmethod + def check_wandb_run(cls, data): + if data.get("wandb_run_id") and not data.get("wandb_name"): + data["wandb_name"] = data.get("wandb_run_id") + + LOG.warning( + "wandb_run_id sets the ID of the run. If you would like to set the name, please use wandb_name instead." + ) + + return data + + +# pylint: disable=too-many-public-methods,too-many-ancestors +class AxolotlInputConfig( + ModelInputConfig, + ModelOutputConfig, + LoraConfig, + ReLoRAConfig, + HyperparametersConfig, + WandbConfig, + MLFlowConfig, + RemappedParameters, + DeprecatedParameters, + BaseModel, +): + """wrapper of all config options""" + + class Config: + """Config for alias""" + + populate_by_name = True + + strict: Optional[bool] = Field(default=False) + resume_from_checkpoint: Optional[str] = None + auto_resume_from_checkpoints: Optional[bool] = None + resize_token_embeddings_to_32x: Optional[bool] = None + + rl: Optional[RLType] = None + + datasets: Optional[conlist(Union[SFTDataset, DPODataset], min_length=1)] = None # type: ignore + test_datasets: Optional[conlist(Union[SFTDataset, DPODataset], min_length=1)] = None # type: ignore + shuffle_merged_datasets: Optional[bool] = True + dataset_prepared_path: Optional[str] = None + dataset_shard_num: Optional[int] = None + dataset_shard_idx: Optional[int] = None + + pretraining_dataset: Optional[ # type: ignore + conlist(Union[SFTDataset, PretrainingDataset], min_length=1) + ] = Field( + default=None, metadata={"help": {"streaming dataset to use for pretraining"}} + ) + dataset_processes: Optional[int] = Field(default=os.cpu_count()) + dataset_keep_in_memory: Optional[bool] = None + dataloader_pin_memory: Optional[bool] = None + dataloader_num_workers: Optional[int] = None + dataloader_prefetch_factor: Optional[int] = None + dataloader_drop_last: Optional[bool] = None + + remove_unused_columns: Optional[bool] = None + + push_dataset_to_hub: Optional[str] = None + hf_use_auth_token: Optional[bool] = None + + device: Optional[Any] = None + device_map: Optional[Any] = None + world_size: Optional[int] = None + local_rank: Optional[int] = None + ddp: Optional[bool] = None + + seed: Optional[int] = None + ddp_timeout: Optional[int] = None + ddp_bucket_cap_mb: Optional[int] = None + ddp_broadcast_buffers: Optional[bool] = None + ddp_find_unused_parameters: Optional[bool] = None + + eval_table_size: Optional[int] = None + eval_max_new_tokens: Optional[int] = None + do_causal_lm_eval: Optional[bool] = None + eval_causal_lm_metrics: Optional[List[str]] = None + do_bench_eval: Optional[bool] = None + bench_dataset: Optional[str] = None + metric_for_best_model: Optional[str] = None + greater_is_better: Optional[bool] = None + + loss_watchdog_threshold: Optional[float] = None + loss_watchdog_patience: Optional[int] = None + + bf16: Optional[Union[AutoType, bool]] = AutoType.AUTO + fp16: Optional[bool] = None + bfloat16: Optional[bool] = None # for non-AMP cases + float16: Optional[bool] = None # for non-AMP cases + tf32: Optional[bool] = None + float32: Optional[bool] = None + + # torch_dtype: Optional[torch.dtype] + + gradient_checkpointing: Optional[bool] = Field(default=False) + gradient_checkpointing_kwargs: Optional[Dict[str, Any]] = None + + unfrozen_parameters: Optional[List[str]] = None + + sequence_len: int = Field(default=1024) + sample_packing: Optional[bool] = None + eval_sample_packing: Optional[bool] = None + pad_to_sequence_len: Optional[bool] = None + + xformers_attention: Optional[bool] = None + sdp_attention: Optional[bool] = None + s2_attention: Optional[bool] = None + flash_attention: Optional[bool] = None + flash_attn_cross_entropy: Optional[bool] = None + flash_attn_rms_norm: Optional[bool] = None + flash_attn_fuse_qkv: Optional[bool] = None + flash_attn_fuse_mlp: Optional[bool] = None + flash_optimum: Optional[bool] = None + + deepspeed: Optional[Union[str, Dict[str, Any]]] = None + fsdp: Optional[List[str]] = None + fsdp_config: Optional[Dict[str, Any]] = None + + val_set_size: Optional[float] = Field(default=0.0) + + special_tokens: Optional[SpecialTokensConfig] = None + tokens: Optional[List[str]] = None + + torch_compile: Optional[bool] = None + torch_compile_backend: Optional[str] = None + + max_steps: Optional[int] = None + warmup_steps: Optional[int] = None + warmup_ratio: Optional[float] = None + eval_steps: Optional[Union[int, float]] = None + evals_per_epoch: Optional[Union[int]] = None + evaluation_strategy: Optional[str] = None + save_steps: Optional[Union[int, float]] = None + saves_per_epoch: Optional[int] = None + save_strategy: Optional[str] = None + save_total_limit: Optional[int] = None + logging_steps: Optional[int] = None + early_stopping_patience: Optional[int] = None + load_best_model_at_end: Optional[bool] = False + + neftune_noise_alpha: Optional[float] = None + + orpo_alpha: Optional[float] = None + + max_memory: Optional[ + Dict[Union[int, Literal["cpu", "disk"]], Union[int, str]] + ] = None + gpu_memory_limit: Optional[Union[int, str]] = None + + chat_template: Optional[ChatTemplate] = None + default_system_message: Optional[str] = None + + # INTERNALS - document for now, generally not set externally + is_preprocess: Optional[bool] = None + + total_num_tokens: Optional[int] = None + total_supervised_tokens: Optional[int] = None + sample_packing_eff_est: Optional[float] = None + axolotl_config_path: Optional[str] = None + + is_falcon_derived_model: Optional[bool] = Field(default=False) + is_llama_derived_model: Optional[bool] = Field(default=False) + is_mistral_derived_model: Optional[bool] = Field(default=False) + is_qwen_derived_model: Optional[bool] = Field(default=False) + + @field_validator("datasets", mode="before") + @classmethod + def fix_sharegpt_datasets(cls, datasets): + for idx, ds_cfg in enumerate(datasets): + if not ds_cfg["type"]: + continue + if ds_cfg["type"] == "sharegpt:chat": + LOG.warning( + PendingDeprecationWarning( + "`type: sharegpt:chat` will soon be deprecated. simply use `type: sharegpt` instead." + ) + ) + datasets[idx]["type"] = "sharegpt" + if "sharegpt_simple" in ds_cfg["type"]: + LOG.warning( + PendingDeprecationWarning( + "`type: sharegpt_simple` will soon be deprecated. simply use `type: sharegpt` instead." + ) + ) + datasets[idx]["type"] = datasets[idx]["type"].replace( + "sharegpt_simple", "sharegpt" + ) + return datasets + + @model_validator(mode="before") + @classmethod + def check_batch_size_fields(cls, data): + fields = ("micro_batch_size", "gradient_accumulation_steps", "batch_size") + non_empty_count = sum(1 for field in fields if data.get(field)) + + if non_empty_count < 2: + raise ValueError(f"At least two of {', '.join(fields)} must be set") + return data + + @model_validator(mode="before") + @classmethod + def check_pretraining_w_max_steps(cls, data): + if data.get("pretraining_dataset") and not data.get("max_steps"): + raise ValueError( + "max_steps must be set when using iterable pretraining_dataset, Trainer can't infer length and schedule optimizer/learning rate without it!" + ) + return data + + @model_validator(mode="before") + @classmethod + def check_pretraining_w_group_by_length(cls, data): + if data.get("pretraining_dataset") and data.get("group_by_length"): + LOG.warning( + "You probably want to disable group_by_length as it will force a streamed dataset to download completely." + ) + return data + + @model_validator(mode="before") + @classmethod + def check_gptq_w_revision(cls, data): + if data.get("gptq") and data.get("revision_of_model"): + raise ValueError( + "revision_of_model is not supported for GPTQ models. " + + "Please download the model from HuggingFace Hub manually for correct branch, " + + "point to its path, and remove revision_of_model from the config." + ) + return data + + @model_validator(mode="before") + @classmethod + def check_sample_packing_w_xformers(cls, data): + if data.get("sample_packing") and data.get("xformers_attention"): + raise ValueError( + "sample_packing not compatible with xformers_attention. Use flash_attention" + ) + + return data + + @model_validator(mode="before") + @classmethod + def check_sample_packing_w_rl(cls, data): + if data.get("sample_packing") and data.get("rl"): + raise ValueError("`sample_packing: true` does not work with RLHF training") + return data + + @model_validator(mode="before") + @classmethod + def hint_sample_packing_padding(cls, data): + if data.get("sample_packing") and not data.get("pad_to_sequence_len"): + LOG.warning( + "`pad_to_sequence_len: true` is recommended when using sample_packing" + ) + return data + + @model_validator(mode="before") + @classmethod + def check_gas_bsz(cls, data): + if data.get("gradient_accumulation_steps") and data.get("batch_size"): + raise ValueError( + "please set only one of gradient_accumulation_steps or batch_size" + ) + return data + + @model_validator(mode="before") + @classmethod + def hint_eval_train_mbsz(cls, data): + if ( + data.get("eval_batch_size") + and data.get("micro_batch_size") + and data.get("eval_batch_size") != data.get("micro_batch_size") + ): + LOG.warning( + "eval_batch_size != micro_batch_size. This can lead to VRAM instability." + ) + return data + + @model_validator(mode="before") + @classmethod + def check_push_ds_auth(cls, data): + if ( + data.get("push_dataset_to_hub") + and data.get("hf_use_auth_token") is not True + ): + raise ValueError( + "Require cfg.hf_use_auth_token to be True for push_dataset_to_hub" + ) + return data + + @model_validator(mode="after") + def check_falcon_fsdp(self): + if (self.base_model and "falcon" in self.base_model.lower()) and self.fsdp: + raise ValueError("FSDP is not supported for falcon models") + return self + + @model_validator(mode="after") + def check_mpt_checkpointing(self): + if ( + self.base_model and "mpt" in self.base_model.lower() + ) and self.gradient_checkpointing: + raise ValueError("gradient_checkpointing is not supported for MPT models") + return self + + @model_validator(mode="after") + def check_better_transformers(self): + if self.flash_optimum is True: + if self.adapter: + LOG.warning( + "BetterTransformers probably doesn't work with PEFT adapters" + ) + if self.fp16 or self.bf16: + raise ValueError("AMP is not supported with BetterTransformer") + if self.float16 is not True and self.bfloat16 is not True: + LOG.warning( + "You should probably set bfloat16 or float16 to true to " + "load the model in float16 for BetterTransformers" + ) + return self + + @model_validator(mode="after") + def check_adamw_optimizer_params(self): + if any([self.adam_beta1, self.adam_beta2, self.adam_epsilon]) and ( + not self.optimizer or "adamw" not in self.optimizer.value + ): + LOG.warning("adamw hyperparameters found, but no adamw optimizer set") + return self + + @model_validator(mode="before") + @classmethod + def check_saves(cls, data): + if ( + data.get("save_strategy") + and data.get("save_steps") + and data.get("save_strategy") != "steps" + ): + raise ValueError( + "save_strategy and save_steps mismatch. Please set save_strategy to 'steps' or remove save_steps." + ) + if data.get("saves_per_epoch") and data.get("save_steps"): + raise ValueError( + "save_steps and saves_per_epoch are mutually exclusive and cannot be used together." + ) + return data + + @model_validator(mode="before") + @classmethod + def check_push_save(cls, data): + if data.get("hub_model_id") and not ( + data.get("save_steps") or data.get("saves_per_epoch") + ): + LOG.warning( + "hub_model_id is set without any models being saved. To save a model, set either save_steps or saves_per_epoch." + ) + return data + + @model_validator(mode="before") + @classmethod + def check_evals(cls, data): + if ( + data.get("evaluation_strategy") + and data.get("eval_steps") + and data.get("evaluation_strategy") != "steps" + ): + raise ValueError( + "evaluation_strategy and eval_steps mismatch. Please set evaluation_strategy to 'steps' or remove eval_steps." + ) + + if ( + data.get("val_set_size") == 0 + and (data.get("eval_steps") or data.get("evaluation_strategy")) + and not data.get("test_datasets") + ): + raise ValueError( + "eval_steps and evaluation_strategy are not supported with val_set_size == 0" + ) + if data.get("evals_per_epoch") and data.get("eval_steps"): + raise ValueError( + "eval_steps and evals_per_epoch are mutually exclusive and cannot be used together." + ) + if ( + data.get("evals_per_epoch") + and data.get("evaluation_strategy") + and data.get("evaluation_strategy") != "steps" + ): + raise ValueError( + "evaluation_strategy must be empty or set to `steps` when used with evals_per_epoch." + ) + + return data + + @model_validator(mode="before") + @classmethod + def check_eval_packing(cls, data): + if ( + data.get("sample_packing") + and data.get("eval_table_size") + and data.get("eval_sample_packing") is not False + ): + raise ValueError( + "eval_table_size and eval_sample_packing are not supported together with sample_packing. Please set 'eval_sample_packing' to false." + ) + return data + + @model_validator(mode="before") + @classmethod + def check_warmup(cls, data): + if data.get("warmup_steps") and data.get("warmup_ratio"): + raise ValueError("warmup_steps and warmup_ratio are mutually exclusive") + return data + + @model_validator(mode="before") + @classmethod + def check_neftune(cls, data): + if data.get("noisy_embedding_alpha") and not data.get("neftune_noise_alpha"): + data["neftune_noise_alpha"] = data["noisy_embedding_alpha"] + del data["noisy_embedding_alpha"] + elif data.get("noisy_embedding_alpha") and not data.get("neftune_noise_alpha"): + raise ValueError( + "noisy_embedding_alpha is deprecated, use neftune_noise_alpha; both are set, please remove the deprecated noisy_embedding_alpha setting" + ) + return data + + @field_validator("neftune_noise_alpha") + @classmethod + def validate_neftune_noise_alpha(cls, neftune_noise_alpha): + if neftune_noise_alpha is not None and neftune_noise_alpha <= 0.0: + raise ValueError("neftune_noise_alpha must be > 0.0") + return neftune_noise_alpha + + @model_validator(mode="before") + @classmethod + def check_frozen(cls, data): + if ( + data.get("adapter") + and data.get("peft_layers_to_transform") + and data.get("unfrozen_parameters") + ): + raise ValueError( + "`unfrozen_parameters` used with `peft_layers_to_transform` can have unexpected behavior." + ) + + return data + + @model_validator(mode="after") + def check_fft_possible_bad_config(self): + if ( + # pylint: disable=too-many-boolean-expressions + not (self.bf16 or self.bfloat16) + and (self.fp16 or self.float16) + and not self.adapter + and not self.flash_attention + and self.sample_packing + ): + LOG.warning( + "Full fine tune w/o FA2 w/ sample packing and fp16/float16 is likely to raise errors. Try LoRA." + ) + # ValueError: Attempting to unscale FP16 gradients. + # OR + # RuntimeError: expected mat1 and mat2 to have the same dtype, but got: float != c10::Half + return self + + @model_validator(mode="after") + def check_fused_lora(self): + if self.adapter in ["lora", "qlora"] and ( + self.flash_attn_fuse_qkv or self.flash_attn_fuse_mlp + ): + raise ValueError("Fused modules are not supported with LoRA/QLoRA") + return self + + @model_validator(mode="after") + def hint_lora_8bit(self): + loftq = ( + self.peft and self.peft.loftq_config and self.peft.loftq_config.loftq_bits + ) + if not self.load_in_8bit and self.adapter == "lora" and not loftq: + LOG.warning("We recommend setting `load_in_8bit: true` for LORA finetuning") + return self + + @model_validator(mode="after") + def check_early_stopping(self): + if self.early_stopping_patience: + if not self.save_steps or not self.eval_steps: + raise ValueError( + "`early_stopping_patience` requires save_steps and eval_steps to be set. eval_steps should evenly divide save_steps." + ) + if self.save_steps % self.eval_steps != 0: + raise ValueError( + "`early_stopping_patience` requires that eval_steps should evenly divide save_steps." + ) + return self + + @model_validator(mode="after") + def check_relora(self): + if self.relora_steps: + if self.adapter not in ("lora", "qlora"): + raise ValueError("cfg.adapter must be lora or qlora to use ReLoRA") + + if self.fsdp: + raise ValueError("fsdp not supported with ReLoRA") + + if self.deepspeed: + raise ValueError("deepspeed not supported with ReLoRA") + + if self.lr_scheduler == "one_cycle": + raise ValueError( + "ReLoRA is not compatible with the one_cycle scheduler" + ) + + if self.flash_attn_fuse_qkv or self.flash_attn_fuse_mlp: + raise ValueError("Fused modules are not supported with ReLoRA") + return self + + @model_validator(mode="before") + @classmethod + def check_mem_mismatch(cls, data): + if ( + data.get("max_memory") is not None + and data.get("gpu_memory_limit") is not None + ): + raise ValueError( + "max_memory and gpu_memory_limit are mutually exclusive and cannot be used together." + ) + return data + + @model_validator(mode="before") + @classmethod + def check_use_reentrant_mismatch(cls, data): + if ( + data.get("unfrozen_parameters") + and data.get("gradient_checkpointing_kwargs") + and data.get("gradient_checkpointing_kwargs", {}).get("use_reentrant") + is True + ): + # https://github.com/huggingface/transformers/issues/21381 + raise ValueError( + "`use_reentrant` must be false when used with partially frozen model." + ) + return data + + @model_validator(mode="before") + @classmethod + def check_val_w_test_datasets(cls, data): + if data.get("test_datasets") and data.get("val_set_size"): + raise ValueError( + "non-zero val_set_size should not be used with test_datasets configuration" + ) + return data + + @model_validator(mode="before") + @classmethod + def check_fsdp_w_8bit_optimizer(cls, data): + if data.get("fsdp") and "bnb" in data.get("optimizer", ""): + raise ValueError(f"FSDP not compatible with {data.get('optimizer')}") + return data + + @model_validator(mode="before") + @classmethod + def check_causal_lm_evals(cls, data): + if data.get("do_causal_lm_eval") and data.get("eval_sample_packing"): + raise ValueError( + "do_causal_lm_eval is enabled, eval_sample_packing must be set to False" + ) + + if data.get("eval_causal_lm_metrics"): + supported_metrics = ["sacrebleu", "comet", "ter", "chrf"] + if not isinstance(data.get("eval_causal_lm_metrics"), list): + raise ValueError("eval_causal_lm_metrics must be a list") + # only ["sacrebleu", "comet", "ter", "chrf"] supported + if set(data.get("eval_causal_lm_metrics")) - set(supported_metrics): + raise ValueError( + f"eval_causal_lm_metrics must be one of {supported_metrics}" + ) + return data + + @model_validator(mode="before") + @classmethod + def check_dataset_or_pretraining_dataset(cls, data): + if data.get("datasets") is None and data.get("pretraining_dataset") is None: + raise ValueError("either datasets or pretraining_dataset is required") + return data + + +class AxolotlConfigWCapabilities(AxolotlInputConfig): + """wrapper to valdiate gpu capabilities with the configured options""" + + capabilities: GPUCapabilities + + @model_validator(mode="after") + def check_bf16(self): + if self.capabilities.bf16: + if not self.bf16 and not self.bfloat16: + LOG.info( + "bf16 support detected, but not enabled for this configuration." + ) + else: + if ( + not self.merge_lora + and not self.is_preprocess + and (self.bf16 is True or self.bfloat16 is True) + ): + raise ValueError( + "bf16 requested, but AMP is not supported on this GPU. Requires Ampere series or above." + ) + return self + + @model_validator(mode="before") + @classmethod + def check_sample_packing_w_sdpa_bf16(cls, data): + is_sm_90: bool = ( + data["capabilities"] + and data["capabilities"].get("compute_capability") == "sm_90" + ) + if ( + data.get("sample_packing") + and data.get("sdp_attention") + and (data.get("bfloat16") or data.get("bf16")) + and not is_sm_90 + ): + # https://github.com/pytorch/pytorch/blob/1b03423526536b5f3d35bdfa95ccc6197556cf9b/test/test_transformers.py#L2440-L2450 + LOG.warning( + "sample_packing & torch sdpa with bf16 is unsupported may results in 0.0 loss. " + "This may work on H100s." + ) + + return data + + @model_validator(mode="before") + @classmethod + def check_fsdp_deepspeed(cls, data): + if data.get("deepspeed") and data.get("fsdp"): + raise ValueError("deepspeed and fsdp cannot be used together.") + return data diff --git a/src/axolotl/utils/config/models/internals/__init__.py b/src/axolotl/utils/config/models/internals/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..dd742caf45c1a257fd52908e6e6e25aeaaea9c6a --- /dev/null +++ b/src/axolotl/utils/config/models/internals/__init__.py @@ -0,0 +1,14 @@ +"""module for gpu capabilities""" +from typing import Optional + +from pydantic import BaseModel, Field + + +class GPUCapabilities(BaseModel): + """model to manage the gpu capabilities statically""" + + bf16: bool = Field(default=False) + fp8: bool = Field(default=False) + n_gpu: int = Field(default=1) + n_node: int = Field(default=1) + compute_capability: Optional[str] = Field(default=None) diff --git a/src/axolotl/utils/data.py b/src/axolotl/utils/data.py new file mode 100644 index 0000000000000000000000000000000000000000..9e0049e659621c2088c81de5aa11bfad0585917f --- /dev/null +++ b/src/axolotl/utils/data.py @@ -0,0 +1,983 @@ +"""Module containing data utilities""" +import functools +import hashlib +import logging +from collections import defaultdict +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional, Tuple, Union + +import torch +import yaml +from datasets import ( + Dataset, + DatasetDict, + concatenate_datasets, + load_dataset, + load_from_disk, +) +from huggingface_hub import hf_hub_download +from huggingface_hub.utils import HFValidationError +from torch.utils.data import RandomSampler +from transformers import PreTrainedTokenizerBase + +from axolotl.common.const import DEFAULT_DATASET_PREPARED_PATH +from axolotl.datasets import TokenizedPromptDataset +from axolotl.prompt_strategies import load +from axolotl.prompt_strategies.dpo import load as load_dpo +from axolotl.prompt_tokenizers import ( + AlpacaMultipleChoicePromptTokenizingStrategy, + AlpacaPromptTokenizingStrategy, + AlpacaReflectionPTStrategy, + GPTeacherPromptTokenizingStrategy, + JeopardyPromptTokenizingStrategy, + OpenAssistantPromptTokenizingStrategy, + SummarizeTLDRPromptTokenizingStrategy, +) +from axolotl.prompters import ( + AlpacaPrompter, + GPTeacherPrompter, + JeopardyPrompter, + MultipleChoiceConcisePrompter, + MultipleChoiceExplainPrompter, + Prompter, + ReflectAlpacaPrompter, + SummarizeTLDRPrompter, + UnsupportedPrompter, +) +from axolotl.utils.collators import PretrainingBatchSamplerDataCollatorForSeq2Seq +from axolotl.utils.dict import DictDefault +from axolotl.utils.distributed import is_main_process, zero_first +from axolotl.utils.samplers import MultipackBatchSampler, get_dataset_lengths +from axolotl.utils.trainer import ( + calculate_total_num_steps, + process_datasets_for_packing, + process_pretraining_datasets_for_packing, +) + +LOG = logging.getLogger("axolotl") + + +def md5(to_hash: str, encoding: str = "utf-8") -> str: + try: + return hashlib.md5(to_hash.encode(encoding), usedforsecurity=False).hexdigest() + except TypeError: + return hashlib.md5(to_hash.encode(encoding)).hexdigest() # nosec + + +def prepare_dataset(cfg, tokenizer): + prompters = [] + if not cfg.pretraining_dataset: + with zero_first(is_main_process()): + if cfg.test_datasets: + train_dataset, _, prompters = load_prepare_datasets( + tokenizer, cfg, DEFAULT_DATASET_PREPARED_PATH, split="train" + ) + _, eval_dataset, _ = load_prepare_datasets( + tokenizer, cfg, DEFAULT_DATASET_PREPARED_PATH, split="test" + ) + else: + train_dataset, eval_dataset, prompters = load_prepare_datasets( + tokenizer, cfg, DEFAULT_DATASET_PREPARED_PATH + ) + else: + path = cfg.pretraining_dataset + name = None + if isinstance(cfg.pretraining_dataset, list) and isinstance( + cfg.pretraining_dataset[0], dict + ): + path = cfg.pretraining_dataset[0]["path"] + name = cfg.pretraining_dataset[0]["name"] + + ds_wrapper_partial = functools.partial( + get_dataset_wrapper, + cfg.pretraining_dataset[0], + tokenizer, + cfg, + cfg.pretraining_dataset[0]["type"] or "pretrain", + ) + + train_dataset = wrap_pretraining_dataset( + load_dataset(path, streaming=True, split="train", name=name), + tokenizer, + cfg, + ds_wrapper_partial, + max_tokens=cfg.sequence_len, + batch_size=cfg.micro_batch_size, + seed=cfg.seed or 42, + ) + # https://discuss.huggingface.co/t/how-to-use-huggingface-trainer-streaming-datasets-without-wrapping-it-with-torchdatas-iterablewrapper/25230 + train_dataset = train_dataset.with_format("torch") + eval_dataset = None + return train_dataset, eval_dataset, cfg.max_steps, prompters + + if eval_dataset and cfg.sample_packing and cfg.eval_sample_packing is not False: + total_eval_steps = calculate_total_num_steps(cfg, eval_dataset, update=False) + if total_eval_steps == 0: + raise ValueError( + "eval dataset split is too small for sample_packing. You should set `eval_sample_packing: False`. " + ) + + if cfg.max_steps: + total_num_steps = min( + calculate_total_num_steps(cfg, train_dataset), cfg.max_steps + ) + LOG.info(f"Maximum number of steps set at {total_num_steps}") + else: + total_num_steps = calculate_total_num_steps(cfg, train_dataset) + return train_dataset, eval_dataset, total_num_steps, prompters + + +def load_tokenized_prepared_datasets( + tokenizer, + cfg, + default_dataset_prepared_path, + split="train", +) -> Tuple[DatasetDict, List[Prompter]]: + cfg_datasets = cfg.test_datasets if split == "test" else cfg.datasets + tokenizer_name = tokenizer.__class__.__name__ + ds_hash = str( + md5( + ( + str(cfg.sequence_len) + + "@" + + str(cfg.sample_packing) + + "@" + + str(cfg.eval_sample_packing) + + "@" + + str(cfg.group_by_length) + + "@" + + "|".join( + sorted( + [ + f"{d.path}:{d.type}:{d.shards}:{d.conversation}{d.split}" + for d in cfg_datasets + ] + ) + ) + + "|" + + tokenizer_name + ) + ) + ) + prepared_ds_path = ( + Path(cfg.dataset_prepared_path) / ds_hash + if cfg.dataset_prepared_path + else Path(default_dataset_prepared_path) / ds_hash + ) + dataset = None + prompters = [] + use_auth_token = cfg.hf_use_auth_token + try: + if cfg.push_dataset_to_hub: + dataset = load_dataset( + f"{cfg.push_dataset_to_hub}/{ds_hash}", + token=use_auth_token, + ) + dataset = dataset[split] + except Exception: # pylint: disable=broad-except # nosec + pass + + if dataset: + ... + elif ( + cfg.dataset_prepared_path + and any(prepared_ds_path.glob("*")) + and not cfg.is_preprocess + ): + LOG.info(f"Loading prepared dataset from disk at {prepared_ds_path}...") + dataset = load_from_disk(str(prepared_ds_path)) + LOG.info("Prepared dataset loaded from disk...") + else: + LOG.info(f"Unable to find prepared dataset in {prepared_ds_path}") + LOG.info("Loading raw datasets...") + if not cfg.is_preprocess: + LOG.warning( + "Processing datasets during training can lead to VRAM instability. Please pre-process your dataset." + ) + + if cfg.seed: + seed = cfg.seed + else: + LOG.info("No seed provided, using default seed of 42") + seed = 42 + + datasets = [] + + def for_d_in_datasets(dataset_configs): + for dataset in dataset_configs: + if dataset.name and isinstance(dataset.name, list): + for name in dataset.name: + yield DictDefault({**dataset, "name": name}) + else: + yield dataset + + # pylint: disable=invalid-name + for config_dataset in for_d_in_datasets(cfg_datasets): + ds: Optional[Union[Dataset, DatasetDict]] = None + ds_from_hub = False + try: + load_dataset( + config_dataset.path, + name=config_dataset.name, + streaming=True, + token=use_auth_token, + ) + ds_from_hub = True + except (FileNotFoundError, ConnectionError, HFValidationError): + pass + + ds_from_cloud = False + storage_options = {} + remote_file_system = None + if config_dataset.path.startswith("s3://"): + try: + import aiobotocore.session # type: ignore + import s3fs # type: ignore + except ImportError as exc: + raise ImportError( + "s3:// paths require aiobotocore and s3fs to be installed" + ) from exc + + # Takes credentials from ~/.aws/credentials for default profile + s3_session = aiobotocore.session.AioSession(profile="default") + storage_options = {"session": s3_session} + remote_file_system = s3fs.S3FileSystem(**storage_options) + elif config_dataset.path.startswith( + "gs://" + ) or config_dataset.path.startswith("gcs://"): + try: + import gcsfs # type: ignore + except ImportError as exc: + raise ImportError( + "gs:// or gcs:// paths require gcsfs to be installed" + ) from exc + + # gcsfs will use default credentials from the environment else anon + # https://gcsfs.readthedocs.io/en/latest/#credentials + storage_options = {"token": None} + remote_file_system = gcsfs.GCSFileSystem(**storage_options) + # TODO: Figure out how to get auth creds passed + # elif config_dataset.path.startswith("adl://") or config_dataset.path.startswith("abfs://"): + # try: + # import adlfs + # except ImportError as exc: + # raise ImportError( + # "adl:// or abfs:// paths require adlfs to be installed" + # ) from exc + + # # Gen 1 + # storage_options = { + # "tenant_id": TENANT_ID, + # "client_id": CLIENT_ID, + # "client_secret": CLIENT_SECRET, + # } + # # Gen 2 + # storage_options = { + # "account_name": ACCOUNT_NAME, + # "account_key": ACCOUNT_KEY, + # } + + # remote_file_system = adlfs.AzureBlobFileSystem(**storage_options) + try: + if remote_file_system and remote_file_system.exists( + config_dataset.path + ): + ds_from_cloud = True + except (FileNotFoundError, ConnectionError): + pass + + # prefer local dataset, even if hub exists + local_path = Path(config_dataset.path) + if local_path.exists(): + if local_path.is_dir(): + # TODO dirs with arrow or parquet files could be loaded with `load_from_disk` + ds = load_dataset( + config_dataset.path, + name=config_dataset.name, + data_files=config_dataset.data_files, + streaming=False, + split=None, + ) + elif local_path.is_file(): + ds_type = get_ds_type(config_dataset) + + ds = load_dataset( + ds_type, + name=config_dataset.name, + data_files=config_dataset.path, + streaming=False, + split=None, + ) + else: + raise ValueError( + "unhandled dataset load: local path exists, but is neither a directory or a file" + ) + elif ds_from_hub: + ds = load_dataset( + config_dataset.path, + name=config_dataset.name, + streaming=False, + data_files=config_dataset.data_files, + token=use_auth_token, + ) + elif ds_from_cloud and remote_file_system: + if remote_file_system.isdir(config_dataset.path): + ds = load_from_disk( + config_dataset.path, + storage_options=storage_options, + ) + elif remote_file_system.isfile(config_dataset.path): + ds_type = get_ds_type(config_dataset) + ds = load_dataset( + ds_type, + name=config_dataset.name, + data_files=config_dataset.path, + streaming=False, + split=None, + storage_options=storage_options, + ) + elif config_dataset.path.startswith("https://"): + ds_type = get_ds_type(config_dataset) + ds = load_dataset( + ds_type, + name=config_dataset.name, + data_files=config_dataset.path, + streaming=False, + split=None, + storage_options=storage_options, + ) + else: + if isinstance(config_dataset.data_files, str): + fp = hf_hub_download( + repo_id=config_dataset.path, + repo_type="dataset", + filename=config_dataset.data_files, + ) + elif isinstance(config_dataset.data_files, list): + fp = [] + for file in config_dataset.data_files: + fp.append( + hf_hub_download( + repo_id=config_dataset.path, + repo_type="dataset", + filename=file, + ) + ) + else: + raise ValueError( + "data_files must be either a string or list of strings" + ) + ds = load_dataset( + "json", + name=config_dataset.name, + data_files=fp, + streaming=False, + split=None, + ) + if not ds: + raise ValueError("unhandled dataset load") + + d_base_type = d_prompt_style = None + d_type = config_dataset.type + if isinstance(d_type, str): + d_type_split = d_type.split(":") + d_base_type = d_type_split[0] + d_prompt_style = d_type_split[1] if len(d_type_split) > 1 else None + + if config_dataset.split and config_dataset.split in ds: + ds = ds[config_dataset.split] + elif split in ds: + ds = ds[split] + elif isinstance(ds, DatasetDict): + raise ValueError( + f"no {split} split found for dataset {config_dataset.path}, you may specify a split with 'split: `" + ) + + # support for using a subset of the data + if config_dataset.shards: + shards_idx = config_dataset.get("shards_idx", 0) + ds = ds.shuffle(seed=seed).shard( + num_shards=config_dataset.shards, index=shards_idx + ) + + dataset_wrapper, dataset_prompter = get_dataset_wrapper( + config_dataset=config_dataset, + tokenizer=tokenizer, + cfg=cfg, + dataset=ds, + d_base_type=d_base_type, + d_prompt_style=d_prompt_style, + ) + datasets.append(dataset_wrapper) + prompters.append(dataset_prompter) + + LOG.info("merging datasets") + dataset = concatenate_datasets(datasets) + + if len(datasets) > 1: + if cfg.shuffle_merged_datasets: + LOG.debug("shuffle merged datasets") + dataset = dataset.shuffle(seed=seed) + else: + LOG.debug("NOT shuffling merged datasets") + + dataset, _ = process_datasets_for_packing(cfg, dataset, None) + + if cfg.local_rank == 0: + LOG.info(f"Saving merged prepared dataset to disk... {prepared_ds_path}") + dataset.save_to_disk(prepared_ds_path) + if cfg.push_dataset_to_hub: + LOG.info( + f"Saving merged prepared dataset with push_to_hub... {cfg.push_dataset_to_hub}/{ds_hash}" + ) + dataset.push_to_hub( + f"{cfg.push_dataset_to_hub}/{ds_hash}", private=True + ) + + return dataset, prompters + + +def get_ds_type(config_dataset: DictDefault): + """ + Get the dataset type from the path if it's not specified + """ + ds_type = "json" + if config_dataset.ds_type: + ds_type = config_dataset.ds_type + elif ".parquet" in config_dataset.path: + ds_type = "parquet" + elif ".arrow" in config_dataset.path: + ds_type = "arrow" + elif ".csv" in config_dataset.path: + ds_type = "csv" + elif ".txt" in config_dataset.path: + ds_type = "text" + return ds_type + + +def load_prepare_datasets( + tokenizer: PreTrainedTokenizerBase, + cfg, + default_dataset_prepared_path, + split="train", +) -> Tuple[Dataset, Dataset, List[Prompter]]: + dataset, prompters = load_tokenized_prepared_datasets( + tokenizer, cfg, default_dataset_prepared_path, split=split + ) + + if cfg.dataset_shard_num and cfg.dataset_shard_idx is not None: + LOG.info( + f"Using index #{cfg.dataset_shard_idx} of {cfg.dataset_shard_num} shards" + ) + dataset = dataset.shard( + num_shards=cfg.dataset_shard_num, + index=cfg.dataset_shard_idx, + ) + + if split == "train" and cfg.val_set_size: + # ensure we end up with the same fingerprint by doing rank0 first and being able to cache + to_hash_train = ( + dataset._fingerprint # pylint: disable=protected-access + + "|" + + str(cfg.val_set_size) + + "|" + + "train" + + "|" + + str(cfg.seed or 42) + ) + to_hash_test = ( + dataset._fingerprint # pylint: disable=protected-access + + "|" + + str(cfg.val_set_size) + + "|" + + "test" + + "|" + + str(cfg.seed or 42) + ) + train_fingerprint = md5(to_hash_train) + test_fingerprint = md5(to_hash_test) + + dataset = dataset.train_test_split( + test_size=cfg.val_set_size, + shuffle=False, + seed=cfg.seed or 42, + train_new_fingerprint=train_fingerprint, + test_new_fingerprint=test_fingerprint, + ) + + train_dataset = dataset["train"] + eval_dataset = dataset["test"] + elif split == "test": + train_dataset = None + eval_dataset = dataset + else: + train_dataset = dataset + eval_dataset = None + + return train_dataset, eval_dataset, prompters + + +def get_dataset_wrapper( + config_dataset, + tokenizer, + cfg, + d_base_type, + dataset, + d_prompt_style=None, +): + dataset_wrapper = None + dataset_prompter = None + + ds_kwargs = { + "process_count": cfg.dataset_processes, + "keep_in_memory": cfg.dataset_keep_in_memory is True, + } + + if ( + isinstance(dataset, Dataset) + and "input_ids" in dataset.features + and "attention_mask" in dataset.features + and "labels" in dataset.features + ): + # dataset is already tokenized, just drop it straight in + dataset_prompter = UnsupportedPrompter() + dataset_wrapper = dataset + elif isinstance(config_dataset.type, DictDefault): + ds_strategy = load( + "user_defined", tokenizer, cfg, config_dataset.type.to_dict() + ) + dataset_prompter = UnsupportedPrompter() + dataset_wrapper = TokenizedPromptDataset( + ds_strategy, + dataset, + **ds_kwargs, + ) + elif ds_strategy := load(config_dataset.type, tokenizer, cfg, config_dataset): + dataset_prompter = UnsupportedPrompter() + dataset_wrapper = TokenizedPromptDataset( + ds_strategy, + dataset, + **ds_kwargs, + ) + elif d_base_type == "alpaca": + dataset_prompter = AlpacaPrompter(d_prompt_style) + ds_strategy = AlpacaPromptTokenizingStrategy( + dataset_prompter, + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + ) + ds_wrapper = TokenizedPromptDataset( + ds_strategy, + dataset, + **ds_kwargs, + ) + dataset_wrapper = ds_wrapper + elif d_base_type == "explainchoice": + dataset_prompter = MultipleChoiceExplainPrompter(d_prompt_style) + ds_strategy = AlpacaMultipleChoicePromptTokenizingStrategy( + dataset_prompter, + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + ) + ds_wrapper = TokenizedPromptDataset( + ds_strategy, + dataset, + **ds_kwargs, + ) + dataset_wrapper = ds_wrapper + elif d_base_type == "concisechoice": + dataset_prompter = MultipleChoiceConcisePrompter(d_prompt_style) + ds_strategy = AlpacaMultipleChoicePromptTokenizingStrategy( + dataset_prompter, + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + ) + ds_wrapper = TokenizedPromptDataset( + ds_strategy, + dataset, + **ds_kwargs, + ) + dataset_wrapper = ds_wrapper + elif d_base_type == "summarizetldr": + dataset_prompter = SummarizeTLDRPrompter(d_prompt_style) + ds_strategy = SummarizeTLDRPromptTokenizingStrategy( + dataset_prompter, + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + ) + ds_wrapper = TokenizedPromptDataset( + ds_strategy, + dataset, + **ds_kwargs, + ) + dataset_wrapper = ds_wrapper + elif d_base_type == "jeopardy": + dataset_prompter = JeopardyPrompter(d_prompt_style) + ds_strategy = JeopardyPromptTokenizingStrategy( + dataset_prompter, + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + ) + ds_wrapper = TokenizedPromptDataset( + ds_strategy, + dataset, + **ds_kwargs, + ) + dataset_wrapper = ds_wrapper + elif d_base_type == "oasst": + dataset_prompter = AlpacaPrompter(d_prompt_style) + ds_strategy = OpenAssistantPromptTokenizingStrategy( + dataset_prompter, + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + ) + ds_wrapper = TokenizedPromptDataset( + ds_strategy, + dataset, + **ds_kwargs, + ) + dataset_wrapper = ds_wrapper + elif d_base_type == "gpteacher": + dataset_prompter = GPTeacherPrompter(d_prompt_style) + ds_strategy = GPTeacherPromptTokenizingStrategy( + dataset_prompter, + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + ) + ds_wrapper = TokenizedPromptDataset( + ds_strategy, + dataset, + **ds_kwargs, + ) + dataset_wrapper = ds_wrapper + elif d_base_type == "reflection": + dataset_prompter = ReflectAlpacaPrompter(d_prompt_style) + ds_strategy = AlpacaReflectionPTStrategy( + dataset_prompter, + tokenizer, + cfg.train_on_inputs, + cfg.sequence_len, + ) + ds_wrapper = TokenizedPromptDataset( + ds_strategy, + dataset, + **ds_kwargs, + ) + dataset_wrapper = ds_wrapper + else: + suffix = "" + if ":load_" in config_dataset.type: + suffix = f" Did you mean {config_dataset.type.replace(':load_', '.load_')}?" + LOG.error( + f"unhandled prompt tokenization strategy: {config_dataset.type}. {suffix}" + ) + raise ValueError( + f"unhandled prompt tokenization strategy: {config_dataset.type} {suffix}" + ) + + return dataset_wrapper, dataset_prompter + + +def encode_pretraining( + tokenizer: PreTrainedTokenizerBase, max_tokens: int, examples: List[str] +) -> Dict[str, List]: + res = tokenizer( + examples, + truncation=True, + max_length=max_tokens - 2, + add_special_tokens=True, + ) + # Convert to PyTorch tensors + input_ids = [torch.tensor(seq) for seq in res["input_ids"]] + attention_mask = [torch.tensor(seq) for seq in res["attention_mask"]] + new_input_ids = [] + new_attention_mask = [] + # Append EOS and PAD tokens to input_ids, and correct attention_mask + for i, _ in enumerate(input_ids): + input_ids[i] = torch.cat( + ( + input_ids[i], + torch.tensor([tokenizer.eos_token_id, tokenizer.pad_token_id]), + ), + dim=0, + ) + attention_mask[i] = torch.cat((attention_mask[i], torch.tensor([1, 0])), dim=0) + + # Concatenate tokens so that their lengths are less than max_tokens + buffer_input_ids = torch.tensor([], dtype=torch.long) + buffer_attention_mask = torch.tensor([], dtype=torch.long) + + for ids, mask in zip(input_ids, attention_mask): + if buffer_input_ids.numel() == max_tokens: + new_input_ids.append(buffer_input_ids) + new_attention_mask.append(buffer_attention_mask) + buffer_input_ids = torch.tensor([], dtype=torch.long) + buffer_attention_mask = torch.tensor([], dtype=torch.long) + buffer_input_ids = torch.cat((buffer_input_ids, ids), dim=0) + buffer_attention_mask = torch.cat((buffer_attention_mask, mask), dim=0) + elif buffer_input_ids.numel() + ids.numel() <= max_tokens: + buffer_input_ids = torch.cat((buffer_input_ids, ids), dim=0) + buffer_attention_mask = torch.cat((buffer_attention_mask, mask), dim=0) + else: + buffer_input_ids = torch.cat( + ( + buffer_input_ids, + torch.full( + (max_tokens - buffer_input_ids.numel(),), + tokenizer.pad_token_id, + dtype=torch.long, + ), + ), + dim=0, + ) + buffer_attention_mask = torch.cat( + ( + buffer_attention_mask, + torch.full( + (max_tokens - buffer_attention_mask.numel(),), + 0, + dtype=torch.long, + ), + ), + dim=0, + ) + new_input_ids.append(buffer_input_ids) + new_attention_mask.append(buffer_attention_mask) + buffer_input_ids = torch.tensor([], dtype=torch.long) + buffer_attention_mask = torch.tensor([], dtype=torch.long) + + buffer_input_ids = torch.cat((buffer_input_ids, ids), dim=0) + buffer_attention_mask = torch.cat((buffer_attention_mask, mask), dim=0) + + if buffer_input_ids.numel() > 0: # for any leftover tokens + while buffer_input_ids.numel() < max_tokens: # make all sequences equal in size + buffer_input_ids = torch.cat( + ( + buffer_input_ids, + torch.full( + (max_tokens - buffer_input_ids.numel(),), + tokenizer.pad_token_id, + dtype=torch.long, + ), + ), + dim=0, + ) + buffer_attention_mask = torch.cat( + ( + buffer_attention_mask, + torch.full( + (max_tokens - buffer_attention_mask.numel(),), + 0, + dtype=torch.long, + ), + ), + dim=0, + ) + new_input_ids.append(buffer_input_ids) + new_attention_mask.append(buffer_attention_mask) + + ret = { + "input_ids": [seq.tolist() for seq in new_input_ids], + "labels": [seq.tolist() for seq in new_input_ids], + "attention_mask": [seq.tolist() for seq in new_attention_mask], + } + + LOG.debug(len(ret["input_ids"])) + return ret + + +def wrap_pretraining_dataset( + dataset, + tokenizer, + cfg, + ds_wrapper_fn, + max_tokens=2048, + batch_size=1, + seed=42, + buffer_size=10_000, +): + if cfg.sample_packing: + collate_fn = PretrainingBatchSamplerDataCollatorForSeq2Seq( + tokenizer, + return_tensors="pt", + padding=True, + pad_to_multiple_of=max_tokens * batch_size, + ) + encode = functools.partial( + encode_packed_pretraining, + collate_fn, + ds_wrapper_fn, + max_seq_length=max_tokens, + batch_size=batch_size, + ) + # set this to 1 so downstream data_loader doesn't try to increase the batch again + cfg.micro_batch_size = 1 + else: + encode = functools.partial(encode_pretraining, tokenizer, max_tokens) + + if cfg.shuffle_merged_datasets: + dataset = dataset.shuffle(seed=seed, buffer_size=buffer_size) + else: + LOG.debug("NOT shuffling merged pretraining datasets") + + dataset = dataset.map( + encode, + batched=True, + batch_size=buffer_size, + # input_columns="text", + # remove all the existing columns after mapping since they end up having + # a different length than the encoded/tokenized column + remove_columns=dataset.features.keys(), + ) + return dataset + + +def encode_packed_pretraining( + collate_fn, + ds_wrapper: Callable, + examples: Dict[str, List], + max_seq_length: int = 2048, + batch_size: int = 4, +) -> Dict[str, List]: + # pylint: disable=duplicate-code + # tokenize all the examples + # rows get split with stride (overlap) + train_dataset = ds_wrapper(Dataset.from_dict(examples))[0] + + train_dataset = process_pretraining_datasets_for_packing( + train_dataset, max_seq_length + ) + + sampler = MultipackBatchSampler( + RandomSampler(train_dataset), + batch_size=1, + drop_last=True, + batch_max_len=batch_size * max_seq_length, + lengths=get_dataset_lengths(train_dataset), + ) + + chunked_data = defaultdict(list) + + for batch in sampler: + for data in batch: + features = train_dataset[data] + if "num_truncated_tokens" in features: + del features["num_truncated_tokens"] + if "num_truncated_tokens" in features: + del features["num_truncated_tokens"] + if "overflow_to_sample_mapping" in features: + del features["overflow_to_sample_mapping"] + if "labels" not in features: + features["labels"] = features["input_ids"].copy() + collated_features = collate_fn(features) + + for feature in features.keys(): + if feature == "length": + continue + chunked_data[feature].append(collated_features[feature].squeeze(0)) + + return chunked_data + + +def _get_path(ds_hash, cfg): + prepared_ds_path = ( + Path(cfg.dataset_prepared_path) / ds_hash + if cfg.dataset_prepared_path + else Path(DEFAULT_DATASET_PREPARED_PATH) / ds_hash + ) + + return prepared_ds_path + + +def _load_preprocessed_ds(cfg, sub_cfg): + ds_hash = md5(yaml.dump(sub_cfg, Dumper=yaml.Dumper)) + prepared_ds_path = _get_path(ds_hash, cfg) + dataset = None + + if ( + cfg.dataset_prepared_path + and any(prepared_ds_path.glob("*")) + and not cfg.is_preprocess + ): + LOG.info(f"Loading prepared dataset from disk at {prepared_ds_path}...") + dataset = load_from_disk(str(prepared_ds_path)) + + return dataset + + +def _save_preprocessed_ds(cfg, sub_cfg, dataset): + ds_hash = md5(yaml.dump(sub_cfg, Dumper=yaml.Dumper)) + prepared_ds_path = _get_path(ds_hash, cfg) + + if cfg.is_preprocess and is_main_process(): + LOG.info(f"Loading prepared dataset from disk at {prepared_ds_path}...") + dataset.save_to_disk(str(prepared_ds_path)) + + +def load_prepare_dpo_datasets(cfg): + def load_split(dataset_cfgs, _cfg): + split_datasets: List[Any] = [] + for i, ds_cfg in enumerate(dataset_cfgs): + if ds_cfg["ds_type"] == "json": + for data_file in ds_cfg["data_files"]: + data_files = {ds_cfg["split"]: data_file} + ds = load_dataset( # pylint: disable=invalid-name + "json", + data_files=data_files, + split=ds_cfg["split"], + ) + split_datasets.insert(i, ds) + else: + ds = load_dataset( # pylint: disable=invalid-name + ds_cfg["path"], + split=ds_cfg["split"], + ) + split_datasets.insert(i, ds) + + for i, data_set in enumerate(split_datasets): + _type = dataset_cfgs[i]["type"] + if _type: + if isinstance(_type, DictDefault): + _type = "user_defined.default" + ds_transform_fn = load_dpo(_type, _cfg, dataset_idx=i) + split_datasets[i] = data_set.map( + ds_transform_fn, + desc="Mapping RL Dataset", + ) + else: + # If no `type` is provided, assume the dataset is already in the expected format with + # "prompt", "chosen" and "rejected" already preprocessed + split_datasets[i] = data_set + + return concatenate_datasets(split_datasets) + + with zero_first(is_main_process()): + train_is_preprocessed = False + eval_is_preprocessed = False + if train_dataset := _load_preprocessed_ds(cfg, cfg.datasets): + train_is_preprocessed = True + else: + train_dataset = load_split(cfg.datasets, cfg) + + eval_dataset = None + if cfg.test_datasets: + if eval_dataset := _load_preprocessed_ds(cfg, cfg.test_datasets): + eval_is_preprocessed = True + else: + eval_dataset = load_split(cfg.test_datasets, cfg) + if not eval_dataset: + eval_dataset = None + + if not train_is_preprocessed: + _save_preprocessed_ds(cfg, cfg.datasets, train_dataset) + if eval_dataset and not eval_is_preprocessed: + _save_preprocessed_ds(cfg, cfg.test_datasets, eval_dataset) + + return train_dataset, eval_dataset diff --git a/src/axolotl/utils/dict.py b/src/axolotl/utils/dict.py new file mode 100644 index 0000000000000000000000000000000000000000..409d088e6d2c6d63ce7d1b323c4c0fcf344ab143 --- /dev/null +++ b/src/axolotl/utils/dict.py @@ -0,0 +1,15 @@ +"""Module containing the DictDefault class""" + +from addict import Dict + + +class DictDefault(Dict): + """ + A Dict that returns None instead of returning empty Dict for missing keys. + """ + + def __missing__(self, key): + return None + + def __or__(self, other): + return DictDefault(super().__ror__(other)) diff --git a/src/axolotl/utils/distributed.py b/src/axolotl/utils/distributed.py new file mode 100644 index 0000000000000000000000000000000000000000..313dd24e8c18b0a6a3797677653f139902e8a3e4 --- /dev/null +++ b/src/axolotl/utils/distributed.py @@ -0,0 +1,230 @@ +""" +utility helpers for distributed checks +""" +import os +import pickle # nosec +from contextlib import contextmanager + +import torch +import torch.distributed as dist +from accelerate import Accelerator + +accelerate = None # pylint: disable=invalid-name + + +def load_accelerate(): + global accelerate # pylint: disable=global-statement + accelerate = Accelerator() + + +def is_distributed(): + """ + Check if distributed training is initialized. + """ + global accelerate # pylint: disable=global-statement + if not accelerate: + accelerate = Accelerator() + return dist.is_available() and dist.is_initialized() + + +def barrier(): + """ + Acts as a barrier to wait for all processes. This ensures that all processes + reach the barrier before proceeding further. + """ + if is_distributed(): + dist.barrier() + + +def is_main_process(): + """ + Check if the current process is the main process. + If not in distributed mode, always return True. + """ + if not is_distributed(): + return True + return dist.get_rank() == 0 + + +def get_world_size(): + return int(os.getenv("WORLD_SIZE", "1")) + + +@contextmanager +def zero_only(): + """ + Context manager that only runs the enclosed block on the main rank. + """ + if is_main_process(): + yield + else: + yield None + + +@contextmanager +def zero_first(is_main): + """ + runs the wrapped context so that rank 0 runs first before other ranks + """ + if not is_main: # other ranks wait first + barrier() + yield + if is_main: # then rank 0 waits after it has run the context + barrier() + + +def gather_scalar_from_all_ranks(fn, world_size=1): # pylint: disable=invalid-name + """ + Run a callable 'fn' on all ranks and gather the results on the specified rank. + + Args: + - fn (callable): A function that computes the value. This should not have any side effects. + - rank (int, optional): The rank that gathers the values. Default is 0. + - world_size (int, optional): Total number of processes in the current distributed setup. + + Returns: + - A list of computed values from all ranks if on the gathering rank, otherwise None. + """ + value_scalar = fn() + if not is_distributed(): + return [value_scalar] + value_tensor = torch.tensor( + value_scalar, device=torch.cuda.current_device() + ).float() + + if not is_main_process(): + dist.gather(value_tensor, dst=0) + else: + gathered_tensors = [torch.zeros_like(value_tensor) for _ in range(world_size)] + dist.gather(value_tensor, gather_list=gathered_tensors, dst=0) + + # Convert tensors back to their original type (int or float) + gathered_values = [] + for tensor in gathered_tensors: + if tensor == tensor.int(): + gathered_values.append(int(tensor.item())) + else: + gathered_values.append(float(tensor.item())) + return gathered_values + return None + + +def broadcast_dict(vals: dict): + if not is_distributed(): + return vals + + if is_main_process(): + data_byte = pickle.dumps(vals) + data_tensor = torch.ByteTensor(list(data_byte)).to("cuda") + data_size = torch.IntTensor([len(data_byte)]).to("cuda") + else: + data_tensor = torch.empty([1024], dtype=torch.uint8, device="cuda") + data_size = torch.IntTensor([0]).to("cuda") + + dist.broadcast(data_size, 0) + if not is_main_process(): + # resize + data_tensor = data_tensor.new_empty([data_size.item()]) + + dist.broadcast(data_tensor, 0) + + if not is_main_process(): + data_list = data_tensor.cpu().tolist() + data_byte = bytes(data_list[: data_size.item()]) + vals = pickle.loads(data_byte) # nosec + + return vals + + +def compute_and_broadcast(fn): # pylint: disable=invalid-name + """ + Compute a value using the function 'fn' only on the specified rank (default is 0). + The value is then broadcasted to all other ranks. + + Args: + - fn (callable): A function that computes the value. This should not have any side effects. + - rank (int, optional): The rank that computes the value. Default is 0. + + Returns: + - The computed value (int or float). + """ + if is_main_process(): + value_scalar = fn() + value_tensor = torch.tensor( + value_scalar, device=torch.cuda.current_device() + ).float() + else: + value_tensor = torch.tensor( + 0.0, device=torch.cuda.current_device() + ) # Placeholder tensor + + # Broadcast the tensor to all processes. + barrier() + dist.broadcast(value_tensor, src=0) + + # Convert the tensor back to its original type (int or float) + if value_tensor == value_tensor.int(): + return int(value_tensor.item()) + return float(value_tensor.item()) + + +def gather_from_all_ranks(fn, world_size=1): # pylint: disable=invalid-name + """ + Run a callable 'fn' on all ranks and gather the results on the specified rank. + + Args: + - fn (callable): A function that computes the value. This should not have any side effects. + - rank (int, optional): The rank that gathers the values. Default is 0. + - world_size (int, optional): Total number of processes in the current distributed setup. + + Returns: + - A list of computed values from all ranks if on the gathering rank, otherwise None. + """ + value_scalar = fn() + value_tensor = torch.tensor( + value_scalar, device=torch.cuda.current_device() + ).float() + + # Placeholder tensor for gathering results + if is_main_process(): + gathered_tensors = [torch.zeros_like(value_tensor) for _ in range(world_size)] + else: + gathered_tensors = None + + dist.gather(value_tensor, gather_list=gathered_tensors, dst=0) + + if is_main_process(): + # Convert tensors back to their original type (int or float) + gathered_values = [] + for tensor in gathered_tensors: + if tensor == tensor.int(): + gathered_values.append(int(tensor.item())) + else: + gathered_values.append(float(tensor.item())) + return gathered_values + return None + + +def reduce_and_broadcast(fn1, fn2): + """ + Run a callable 'fn1' on all ranks, gather the results, reduce them using 'fn2', + and then broadcast the reduced result to all ranks. + + Args: + - fn1 (callable): A function that computes the value on each rank. + - fn2 (callable): A reduction function that takes a list of values and returns a single value. + - world_size (int, optional): Total number of processes in the current distributed setup. + + Returns: + - The reduced and broadcasted value. + """ + + # Gather values from all ranks using fn1 + if not is_distributed(): + return fn2([fn1()]) + + gathered_values = gather_from_all_ranks(fn1, world_size=dist.get_world_size()) + + # Use compute_and_broadcast to compute the reduced value on the main process + # and then broadcast it to all ranks + return compute_and_broadcast(lambda: fn2(gathered_values)) diff --git a/src/axolotl/utils/freeze.py b/src/axolotl/utils/freeze.py new file mode 100644 index 0000000000000000000000000000000000000000..e3d0fd1446fe33246d3cf9b792d093e1b3e2c147 --- /dev/null +++ b/src/axolotl/utils/freeze.py @@ -0,0 +1,228 @@ +""" +module to freeze/unfreeze parameters by name +""" +import logging +import re +from typing import Callable, List, Tuple, Union + +from axolotl.utils.distributed import is_main_process + +LOG = logging.getLogger("axolotl.utils.freeze") + + +def freeze_layers_except(model, regex_patterns): + """ + Freezes all layers of the given model except for the layers that match given regex patterns. + Periods in the patterns are treated as literal periods, not as wildcard characters. + + Parameters: + - model (nn.Module): The PyTorch model to be modified. + - regex_patterns (list of str): List of regex patterns to match layer names to keep unfrozen. + Note that you cannot use a dot as a wildcard character in the patterns since it is reserved for separating layer names. + Also, to match the entire layer name, the pattern should start with "^" and end with "$", otherwise it will match any part of the layer name. + The range pattern part is optional and it is not compiled as a regex pattern which means you must put "$" before the range pattern if you want to match the entire layer name. + E.g., ["^model.embed_tokens.weight$[:32000]", "layers.2[0-9]+.block_sparse_moe.gate.[a-z]+$"] + + Returns: + None; the model is modified in place. + """ + if isinstance(regex_patterns, str): + regex_patterns = [regex_patterns] + + patterns = [LayerNamePattern(pattern) for pattern in regex_patterns] + + # Unfreeze layers that match the regex patterns + for name, param in model.named_parameters(): + param.requires_grad = False + unfrozen_ranges = [] + for pattern in patterns: + if not pattern.match(name): + continue + + param.requires_grad = True + + if pattern.range is not None: + unfrozen_ranges.append(pattern.range) + + merged_unfrozen_ranges = _merge_ranges(unfrozen_ranges, len(param)) + + if param.requires_grad and is_main_process(): + unfrozen_ranges = ( + f" with ranges {merged_unfrozen_ranges}" + if merged_unfrozen_ranges + else "" + ) + LOG.debug(f"Unfrozen {name}{unfrozen_ranges}") + + if not merged_unfrozen_ranges: + continue + + # The range list we need is actually the inverted of the merged ranges + ranges_to_freeze = _invert_ranges(merged_unfrozen_ranges, len(param)) + + param.register_hook(_create_freeze_parameters_hook(ranges_to_freeze)) + + if is_main_process() and all( + not param.requires_grad for param in model.parameters() + ): + LOG.warning("All parameters are frozen. Model will not be trained.") + + +def _invert_ranges( + given_ranges: List[Tuple[int, int]], layer_size: int +) -> List[Tuple[int, int]]: + """ + Inverts a list of ranges to obtain the ranges not covered by the given ranges. + + Parameters: + - given_ranges (List[Tuple[int, int]]): List of ranges to invert. Each range is represented as a tuple of start (inclusive) and end (exclusive) indices. + - layer_size (int): The length of the layer. E.g., len(model.layer.weight) + Returns: + - List[Tuple[int, int]]: List of inverted ranges, where each range is represented as a tuple of start (inclusive) and end (exclusive) indices. + """ + if not given_ranges: + return [(0, layer_size)] + + inverted_ranges = [] + current_start = 0 + + for start, end in sorted(given_ranges): + if start > current_start: + inverted_ranges.append((current_start, start)) + current_start = max(current_start, end) + + # Handle the case where the last given range does not reach the end of the total_size + if current_start < layer_size: + inverted_ranges.append((current_start, layer_size)) + + return inverted_ranges + + +def _merge_ranges( + given_ranges: List[Tuple[int, Union[int, None]]], layer_size: int +) -> List[Tuple[int, int]]: + """ + Merges overlapping ranges and sorts the given ranges. + + This function takes a list of ranges and merges any overlapping ranges. The ranges are represented + as tuples, where the first element is the start index (inclusive) and the second element is the end + index (exclusive). The end index can be None, indicating that the range extends to the end of the + sequence. + + Parameters: + - given_ranges (List[Tuple[int, int | None]]): List of ranges to merge. + - layer_size (int): The length of the layer. E.g., len(model.layer.weight) + + Returns: + - List[Tuple[int, int]]: List of merged ranges, as start (inclusive) and end (exclusive) indices. + """ + # End of each range can be determined now since we have the total size + processed_ranges = [ + (start, end if end is not None else layer_size) for start, end in given_ranges + ] + + # No need to merge if there's only one or no ranges + if len(processed_ranges) <= 1: + return processed_ranges + + sorted_ranges = sorted(processed_ranges) + + merged_ranges = [sorted_ranges[0]] + for start, end in sorted_ranges[1:]: + prev_start, prev_end = merged_ranges[-1] + if start <= prev_end: + merged_ranges[-1] = (prev_start, max(prev_end, end)) + else: + merged_ranges.append((start, end)) + + return merged_ranges + + +def _create_freeze_parameters_hook(ranges_to_freeze: List[Tuple[int, int]]) -> Callable: + """ + Create a hook to freeze parameters in specified ranges by setting their gradients to zero. + + This function takes a list of tuples representing the ranges of indices to freeze. Each tuple should contain + two integers representing the start and end indices of the range. + + Parameters: + - ranges_to_freeze (List[Tuple[int, int]]): Ranges of indices to freeze. + + Returns: + - Callable: A hook function to be used with `register_hook` on parameters. + + Example usage: + ``` + ranges_to_freeze = [(0, 10), (20, 30)] + hook = _create_freeze_parameters_hook(ranges_to_freeze) + model.register_hook(hook) + ``` + """ + + def freeze_parameters_hook(gradients): + for start, end in ranges_to_freeze: + gradients[start:end].zero_() + + return freeze_parameters_hook + + +class LayerNamePattern: + """ + Represents a regex pattern for layer names, potentially including a parameter index range. + """ + + def __init__(self, pattern: str): + """ + Initializes a new instance of the LayerNamePattern class. + + Parameters: + - pattern (str): The regex pattern for layer names, potentially including a parameter index range. + """ + self.raw_pattern = pattern + name_pattern, self.range = self._parse_pattern(pattern) + self.name_regex = re.compile(name_pattern.replace(".", "\\.")) + + def match(self, name: str) -> bool: + """ + Checks if the given layer name matches the regex pattern. + + Parameters: + - name (str): The layer name to check. + + Returns: + - bool: True if the layer name matches the pattern, False otherwise. + """ + return self.name_regex.match(name) is not None + + def _parse_pattern( + self, pattern: str + ) -> Tuple[str, Union[Tuple[int, Union[int, None]], None]]: + """ + Extracts the range pattern from the given pattern. + + Parameters: + - pattern (str): The pattern to extract the range from. + + Returns: + - Tuple[str, Tuple[int, int | None] | None]: A tuple containing the regex pattern to match the layer name without the range pattern and the range of layer indices to match, if specified. + """ + match = re.match(r"^(.+)\[([0-9]*)(?::([0-9]*))?\]$", pattern) + if not match: + return pattern, None + + base_pattern, start_part, end_part = match.groups() + + if end_part is None and start_part.isdecimal(): + index = int(start_part) + return base_pattern, (index, index + 1) + + # [:end] or [start:] or [start:end] + start = int(start_part) if start_part else 0 + end = int(end_part) if end_part else None + + if end is not None and start >= end: + raise ValueError( + f"Invalid range in layer name pattern: {pattern}." + "End of range must be greater than start." + ) + return base_pattern, (start, end) diff --git a/src/axolotl/utils/lora_embeddings.py b/src/axolotl/utils/lora_embeddings.py new file mode 100644 index 0000000000000000000000000000000000000000..70f56655ea14cddcecc2dd0d9f781077243ccabe --- /dev/null +++ b/src/axolotl/utils/lora_embeddings.py @@ -0,0 +1,14 @@ +""" +helpers for lora embeddings +""" + + +def get_linear_embedding_layers(model_type): + """ + returns the linear embedding layers needed for loras, dependent on the model arch + """ + if model_type == "gpt_neox": + return ["embed_in", "embed_out"] + if model_type == "falcon": + return ["word_embeddings", "lm_head"] + return ["embed_tokens", "lm_head"] diff --git a/src/axolotl/utils/mlflow_.py b/src/axolotl/utils/mlflow_.py new file mode 100644 index 0000000000000000000000000000000000000000..ce77390342788a9df9ccd84e4f52bd71abc949f0 --- /dev/null +++ b/src/axolotl/utils/mlflow_.py @@ -0,0 +1,18 @@ +"""Module for mlflow utilities""" + +import os + +from axolotl.utils.dict import DictDefault + + +def setup_mlflow_env_vars(cfg: DictDefault): + for key in cfg.keys(): + if key.startswith("mlflow_") or key.startswith("hf_mlflow_"): + value = cfg.get(key, "") + + if value and isinstance(value, str) and len(value) > 0: + os.environ[key.upper()] = value + + # Enable mlflow if experiment name is present + if cfg.mlflow_experiment_name and len(cfg.mlflow_experiment_name) > 0: + cfg.use_mlflow = True diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py new file mode 100644 index 0000000000000000000000000000000000000000..41fd471e65a61bb52cda302e186b6c986d0b0bc5 --- /dev/null +++ b/src/axolotl/utils/models.py @@ -0,0 +1,896 @@ +"""Module for models and model loading""" +# pylint: disable=too-many-lines + +import logging +import math +import os +import types +from typing import Any, Dict, Optional, Tuple, Union # noqa: F401 + +import addict +import bitsandbytes as bnb +import torch +import transformers +from accelerate import init_empty_weights +from bitsandbytes.nn import Params4bit +from peft import ( + LoftQConfig, + PeftConfig, + PeftModel, + PeftModelForCausalLM, + prepare_model_for_kbit_training, +) +from peft.tuners.lora import QuantLinear +from torch import nn +from transformers import ( # noqa: F401 + AddedToken, + AutoConfig, + AutoModelForCausalLM, + AutoTokenizer, + BitsAndBytesConfig, + GPTQConfig, + PreTrainedModel, + PreTrainedTokenizerBase, +) +from transformers.integrations.deepspeed import is_deepspeed_zero3_enabled + +from axolotl.models.mamba import fix_mamba_attn_for_loss +from axolotl.monkeypatch.multipack import ( + SUPPORTED_MULTIPACK_MODEL_TYPES, + patch_for_multipack, +) +from axolotl.prompt_tokenizers import LLAMA_DEFAULT_EOS_TOKEN +from axolotl.utils.bench import log_gpu_memory_usage +from axolotl.utils.chat_templates import chat_templates +from axolotl.utils.dict import DictDefault +from axolotl.utils.lora_embeddings import get_linear_embedding_layers + +LOG = logging.getLogger("axolotl") + + +def check_model_config(cfg: DictDefault, model_config: Union[AutoConfig, DictDefault]): + quant_config_exists = ( + hasattr(model_config, "quantization_config") + and model_config.quantization_config + ) + quant_config_method_is_gptq = ( + quant_config_exists + and "quant_method" in model_config.quantization_config + and model_config.quantization_config["quant_method"] == "gptq" + ) + + if cfg.gptq and not quant_config_method_is_gptq: + raise ValueError( + "model_config.quantization_config is not set or quant_method is not set to gptq. " + "Please make sure to point to a GPTQ model." + ) + + if not cfg.gptq and quant_config_exists: + raise ValueError( + "model_config.quantization_config is set but `gptq` flag is not. " + "Please use the `gptq` flag to train quantized model or point to a non-quantized model." + ) + + lora_modules_to_save = get_linear_embedding_layers(model_config.model_type) + if ( + cfg.adapter + and cfg.tokens + and ( + not cfg.lora_modules_to_save + or not all(x in cfg.lora_modules_to_save for x in lora_modules_to_save) + ) + ): + lora_modules_to_save = ", ".join(map(lambda x: f"`{x}`", lora_modules_to_save)) + raise ValueError( + f"`lora_modules_to_save` not properly set when adding new tokens. Please include [{lora_modules_to_save}] in `lora_modules_to_save`." + ) + + +def load_model_config(cfg): + model_config_name = cfg.base_model_config or cfg.base_model + if not model_config_name and cfg.tokenizer_config: + model_config_name = cfg.tokenizer_config + trust_remote_code = cfg.trust_remote_code is True + config_kwargs = {} + if cfg.revision_of_model: + config_kwargs["revision"] = cfg.revision_of_model + + try: + model_config = AutoConfig.from_pretrained( + model_config_name, + trust_remote_code=trust_remote_code, + **config_kwargs, + ) + except ValueError as err: + if "mamba" in model_config_name: + return addict.Dict( + { + "model_type": "mamba", + } + ) + raise err + + if cfg.overrides_of_model_config: + for key, val in cfg.overrides_of_model_config.items(): + setattr(model_config, key, val) + + check_model_config(cfg, model_config) + + return model_config + + +def load_tokenizer(cfg): + model_config = load_model_config(cfg) + tokenizer_kwargs = {} + use_fast = True # this is the default + + if cfg.tokenizer_use_fast is not None: + use_fast = cfg.tokenizer_use_fast + if cfg.tokenizer_legacy is not None: + # True is the default w/ https://github.com/huggingface/transformers/pull/25224 + tokenizer_kwargs["legacy"] = cfg.tokenizer_legacy + + tokenizer_cls = AutoTokenizer + if cfg.tokenizer_type: + tokenizer_cls = getattr(transformers, cfg.tokenizer_type) + + tokenizer_config = cfg.tokenizer_config or cfg.base_model_config or cfg.base_model + tokenizer = tokenizer_cls.from_pretrained( + tokenizer_config, + trust_remote_code=cfg.trust_remote_code or False, + use_fast=use_fast, + **tokenizer_kwargs, + ) + + if ( + tokenizer.__class__.__name__ + in [ + "LlamaTokenizer", + "LlamaTokenizerFast", + "CodeLlamaTokenizer", + "CodeLlamaTokenizerFast", + ] + and hasattr(tokenizer, "pad_token") + and not tokenizer.pad_token + ): + # set a pad_token, but use eos_token so we don't add a new token + tokenizer.pad_token = LLAMA_DEFAULT_EOS_TOKEN + + if tokenizer.__class__.__name__ == "GPTNeoXTokenizerFast": + tokenizer.add_special_tokens({"pad_token": "[PAD]"}) + os.environ["TOKENIZERS_PARALLELISM"] = "false" + + # Mistral's official FA implementation requires left padding + if cfg.is_mistral_derived_model and cfg.flash_attention and not cfg.sample_packing: + tokenizer.padding_side = "left" + + # Qwen base only has single token, so we need to set the special tokens + if cfg.is_qwen_derived_model: + token_ids = ["bos_token_id", "eos_token_id", "pad_token_id", "unk_token_id"] + for attr_name in token_ids: + if getattr(tokenizer, attr_name) is None: + setattr(tokenizer, attr_name, tokenizer.eod_id) + + token_names = ["bos_token", "eos_token", "pad_token", "unk_token"] + for attr_name in token_names: + if getattr(tokenizer, attr_name) is None: + setattr(tokenizer, attr_name, "<|endoftext|>") + + additional_special_tokens = None + if cfg.special_tokens: + special_tokens = cfg.special_tokens.to_dict() + additional_special_tokens = special_tokens.pop( + "additional_special_tokens", None + ) + lora_modules_to_save = get_linear_embedding_layers(model_config.model_type) + for k, val in special_tokens.items(): + # check if new special token is not already in tokenizer and + # is adapter training to make sure lora_modules_to_save is set + # pylint: disable=too-many-boolean-expressions + if ( + (getattr(tokenizer, k) is None or getattr(tokenizer, k) != val) + and (len(tokenizer.encode(val, add_special_tokens=False)) > 2) + and cfg.adapter + and ( + not cfg.lora_modules_to_save + or not all( + x in cfg.lora_modules_to_save for x in lora_modules_to_save + ) + ) + ): + lora_modules_to_save = ", ".join( + [f"`{x}`" for x in lora_modules_to_save] + ) + raise ValueError( + f"Please set lora_modules_to_save to [{lora_modules_to_save}] when using an adapter and changing the special tokens." + ) + + tokenizer.add_special_tokens( + {k: AddedToken(val, rstrip=False, lstrip=False, normalized=False)} + ) + + # If we add bos_token and eos_token, we need to update the post processor to + # handle them correctly. + # https://github.com/huggingface/transformers/pull/24132 + bos_or_eos_in_special_tokens = ( + "bos_token" in cfg.special_tokens and "eos_token" in cfg.special_tokens + ) + if ( + tokenizer.__class__.__name__ + in ( + "LlamaTokenizerFast", + "CodeLlamaTokenizerFast", + ) + and bos_or_eos_in_special_tokens + ): + tokenizer.update_post_processor() + + if cfg.tokens: + tokenizer.add_tokens( + [ + AddedToken(token, rstrip=False, lstrip=False, normalized=False) + for token in cfg.tokens + ] + ) + + # Additional special tokens are a List, and need to be treated differently than regular special + # tokens. We add them after we have called `add_tokens` in case these additional special tokens + # are new tokens. + # + # Usage: + # + # ```py + # special_tokens: + # additional_special_tokens: ["<|im_start|>", "<|im_end|>"] + # ``` + if additional_special_tokens is not None: + tokenizer.add_special_tokens( + {"additional_special_tokens": additional_special_tokens} + ) + + LOG.debug(f"EOS: {tokenizer.eos_token_id} / {tokenizer.eos_token}") + LOG.debug(f"BOS: {tokenizer.bos_token_id} / {tokenizer.bos_token}") + LOG.debug(f"PAD: {tokenizer.pad_token_id} / {tokenizer.pad_token}") + LOG.debug(f"UNK: {tokenizer.unk_token_id} / {tokenizer.unk_token}") + + if cfg.chat_template: + chat_template_string = chat_templates(cfg.chat_template) + if cfg.default_system_message and cfg.chat_template == "chatml": + chat_template_string = chat_template_string.replace( + "You are a helpful assistant.", cfg.default_system_message + ) + + tokenizer.chat_template = chat_template_string + else: + LOG.info( + "No Chat template selected. Consider adding a chat template for easier inference." + ) + return tokenizer + + +def load_model( + cfg: DictDefault, + tokenizer: PreTrainedTokenizerBase, + inference: bool = False, + reference_model: bool = False, +) -> Tuple[PreTrainedModel, Optional[PeftConfig]]: + """ + Load a model for a given configuration and tokenizer. + """ + base_model = cfg.base_model + model_type = cfg.type_of_model + model_config = load_model_config(cfg) + + # TODO refactor as a kwarg + load_in_8bit = cfg.load_in_8bit + + if hasattr(model_config, "model_type") and model_config.model_type == "btlm": + if cfg.flash_attention: + from axolotl.monkeypatch.btlm_attn_hijack_flash import ( + replace_btlm_attn_with_flash_attn, + ) + + replace_btlm_attn_with_flash_attn(cfg.base_model) + + if ( + hasattr(model_config, "model_type") + and model_config.model_type == "stablelm_epoch" + ): + if cfg.flash_attention and cfg.sample_packing: + from axolotl.monkeypatch.stablelm_attn_hijack_flash import ( + replace_stablelm_attn_with_flash_attn, + ) + + replace_stablelm_attn_with_flash_attn(cfg.base_model) + + if cfg.sample_packing and cfg.s2_attention: + raise ValueError( + "Received `sample_packing=true` and `s2_attention=true`; however, \ + shifted-sparse attention does not currently support sample packing." + ) + + if ( + cfg.model_config_type in SUPPORTED_MULTIPACK_MODEL_TYPES + and cfg.flash_attention + and cfg.sample_packing + ): + patch_for_multipack(cfg.model_config_type, model_name=cfg.base_model) + elif cfg.is_llama_derived_model: + # Modify all llama derived models in one block + + if cfg.flash_attention: + from axolotl.monkeypatch.llama_attn_hijack_flash import ( + replace_llama_attn_with_flash_attn, + ) + + if cfg.sample_packing: + if cfg.device not in ["mps", "cpu"] and not inference: + LOG.info("patching with flash attention for sample packing") + replace_llama_attn_with_flash_attn( + packed=True, + cross_entropy=cfg.flash_attn_cross_entropy, + rms_norm=cfg.flash_attn_rms_norm, + ) + elif cfg.s2_attention: + LOG.info("patching w/ flash-enabled, shifted-sparse attention") + replace_llama_attn_with_flash_attn( + packed=False, + cross_entropy=cfg.flash_attn_cross_entropy, + rms_norm=cfg.flash_attn_rms_norm, + use_shifted_sparse_attn=True, + ) + elif cfg.xformers_attention: + from axolotl.monkeypatch.llama_attn_hijack_xformers import ( + hijack_llama_attention, + ) + + LOG.info("patching with xformers attention") + hijack_llama_attention() + elif cfg.sample_packing: + from axolotl.monkeypatch.llama_patch_multipack import ( + hijack_llama_prepare_4d_mask, + ) + + LOG.info("patching llama _prepare_4d_causal_attention_mask*") + hijack_llama_prepare_4d_mask() + elif cfg.s2_attention: + raise NotImplementedError( + "Shifted-sparse attention not currently implemented without flash attention." + ) + + # Modify mistral derived models + if ( + cfg.model_config_type == "mistral" + and cfg.flash_attention + and cfg.sample_packing + ): + from axolotl.monkeypatch.mistral_attn_hijack_flash import ( + replace_mistral_attn_with_flash_attn, + ) + + LOG.info("patching mistral with flash attention") + replace_mistral_attn_with_flash_attn(packed=cfg.sample_packing) + + if cfg.is_llama_derived_model and cfg.sample_packing and not inference: + from axolotl.monkeypatch.llama_expand_mask import hijack_expand_mask + + LOG.info("patching _expand_mask") + hijack_expand_mask() + + model_kwargs: Dict[str, Any] = {} + + if cfg.model_kwargs: + for key, val in cfg.model_kwargs.items(): + model_kwargs[key] = val + + max_memory = cfg.max_memory + device_map = cfg.device_map + + if cfg.gpu_memory_limit: + gpu_memory_limit = ( + str(cfg.gpu_memory_limit) + "GiB" + if isinstance(cfg.gpu_memory_limit, int) + else cfg.gpu_memory_limit + ) + + max_memory = {} + for i in range(torch.cuda.device_count()): + max_memory[i] = gpu_memory_limit + max_memory["cpu"] = "256GiB" # something sufficiently large to fit anything + + if max_memory is not None: + # Based on https://github.com/togethercomputer/OpenChatKit/blob/main/inference/bot.py + from accelerate import infer_auto_device_map + + with init_empty_weights(): + model_canvas = AutoModelForCausalLM.from_config(model_config) + model_canvas.tie_weights() + device_map = infer_auto_device_map( + model_canvas, + max_memory=max_memory, + dtype=cfg.torch_dtype, + ) + # We can discard max_memory now as we have a device map set up for us + max_memory = None + + model_kwargs["device_map"] = device_map + model_kwargs["torch_dtype"] = cfg.torch_dtype + + if torch.backends.mps.is_available(): + model_kwargs["device_map"] = "mps:0" + + # TODO can we put the reference model on it's own gpu? I think we have to move logits around to calculate loss + # if cfg.rl: + # if torch.cuda.device_count() > 1: + # if reference_model: + # model_kwargs["device_map"] = "cuda:" + str( + # torch.cuda.current_device() + 1 + # ) + # else: + # model_kwargs["device_map"] = "cuda:" + str(torch.cuda.current_device()) + + if is_deepspeed_zero3_enabled(): + del model_kwargs["device_map"] + + if cfg.revision_of_model: + model_kwargs["revision"] = cfg.revision_of_model + if cfg.gptq: + if not hasattr(model_config, "quantization_config"): + LOG.warning("model config does not contain quantization_config information") + else: + if cfg.gptq_disable_exllama is not None: + model_config.quantization_config[ + "disable_exllama" + ] = cfg.gptq_disable_exllama + model_kwargs["quantization_config"] = GPTQConfig( + **model_config.quantization_config + ) + if cfg.adapter == "qlora" and cfg.load_in_4bit: + bnb_config = { + "load_in_4bit": True, + "llm_int8_threshold": 6.0, + "llm_int8_has_fp16_weight": False, + "bnb_4bit_compute_dtype": cfg.torch_dtype, + "bnb_4bit_use_double_quant": True, + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_quant_storage": torch.bfloat16, + } + + if cfg.bnb_config_kwargs: + bnb_config.update(cfg.bnb_config_kwargs) + + model_kwargs["quantization_config"] = BitsAndBytesConfig( + **bnb_config, + ) + + if cfg.load_in_8bit and cfg.adapter is not None: + model_kwargs["load_in_8bit"] = True + if cfg.load_in_4bit and cfg.adapter is not None: + model_kwargs["load_in_4bit"] = True + + # no longer needed per https://github.com/huggingface/transformers/pull/26610 + if "quantization_config" in model_kwargs or cfg.gptq: + if "load_in_8bit" in model_kwargs: + del model_kwargs["load_in_8bit"] + if "load_in_4bit" in model_kwargs: + del model_kwargs["load_in_4bit"] + + # sample packing uses custom FA2 patch + if cfg.flash_attention: + if not cfg.sample_packing: + if cfg.s2_attention: + pass + # most other models support flash attention, we can define exceptions as they come up + model_kwargs["attn_implementation"] = "flash_attention_2" + model_config._attn_implementation = ( # pylint: disable=protected-access + "flash_attention_2" + ) + else: + if model_config.model_type in SUPPORTED_MULTIPACK_MODEL_TYPES: + model_kwargs["attn_implementation"] = "flash_attention_2" + model_config._attn_implementation = ( # pylint: disable=protected-access + "flash_attention_2" + ) + else: + model_kwargs["attn_implementation"] = "eager" + model_config._attn_implementation = ( # pylint: disable=protected-access + "eager" + ) + elif cfg.sdp_attention: + model_kwargs["attn_implementation"] = "sdpa" + model_config._attn_implementation = "sdpa" # pylint: disable=protected-access + elif cfg.eager_attention: + model_kwargs["attn_implementation"] = "eager" + model_config._attn_implementation = "eager" # pylint: disable=protected-access + + qlora_fsdp = cfg.fsdp and cfg.adapter == "qlora" + + try: + if ( + model_config.model_type == "llama" + and not cfg.trust_remote_code + and not cfg.gptq + ): + from transformers import LlamaForCausalLM + + model = LlamaForCausalLM.from_pretrained( + base_model, + config=model_config, + **model_kwargs, + ) + + if cfg.flash_attention and not inference: + from axolotl.monkeypatch.llama_attn_hijack_flash import ( + is_xformers_swiglu_available, + replace_llama_mlp_with_swiglu, + replace_llama_qkv_with_fused, + ) + + if cfg.flash_attn_fuse_mlp and is_xformers_swiglu_available(): + LOG.info("patching with SwiGLU") + replace_llama_mlp_with_swiglu(model) + + if cfg.flash_attn_fuse_qkv: + LOG.info("patching with fused QKV") + replace_llama_qkv_with_fused(model) + elif model_type == "MambaLMHeadModel": + # FIXME this is janky at best and hacked together to make it work + MambaLMHeadModel = fix_mamba_attn_for_loss() # pylint: disable=invalid-name + + model_kwargs["dtype"] = model_kwargs["torch_dtype"] + model_kwargs["device"] = torch.cuda.current_device() + del model_kwargs["torch_dtype"] + del model_kwargs["device_map"] + + model = MambaLMHeadModel.from_pretrained( + base_model, + **model_kwargs, + ) + elif model_type and not cfg.trust_remote_code: + if cfg.gptq: + model = AutoModelForCausalLM.from_pretrained( + base_model, + config=model_config, + trust_remote_code=cfg.trust_remote_code or False, + **model_kwargs, + ) + else: + model = getattr(transformers, model_type).from_pretrained( + base_model, + config=model_config, + trust_remote_code=cfg.trust_remote_code or False, + **model_kwargs, + ) + else: + # Shouldn't be a problem most of the time. will obviously error if the model doesn't support this + # when training starts + if ( + hasattr(model_config, "max_seq_len") + and model_config.max_seq_len + and cfg.sequence_len > model_config.max_seq_len + ): + model_config.max_seq_len = cfg.sequence_len + LOG.warning(f"increasing context length to {cfg.sequence_len}") + elif ( + hasattr(model_config, "max_sequence_length") + and model_config.max_sequence_length + and cfg.sequence_len > model_config.max_sequence_length + ): + model_config.max_sequence_length = cfg.sequence_len + LOG.warning(f"increasing context length to {cfg.sequence_len}") + if cfg.gptq: + model = AutoModelForCausalLM.from_pretrained( + base_model, + config=model_config, + trust_remote_code=cfg.trust_remote_code or False, + **model_kwargs, + ) + else: + model = AutoModelForCausalLM.from_pretrained( + base_model, + config=model_config, + trust_remote_code=cfg.trust_remote_code or False, + **model_kwargs, + ) + except Exception as err: # pylint: disable=broad-exception-caught + LOG.exception(err) + raise err + + if isinstance(model, (PeftModel, PeftModelForCausalLM)) and not qlora_fsdp: + model = model.merge_and_unload() + + embeddings_len = ( + math.ceil(len(tokenizer) / 32) * 32 + if cfg.resize_token_embeddings_to_32x + else len(tokenizer) + ) + if ( + hasattr(model, "get_input_embeddings") + and model.get_input_embeddings().num_embeddings < embeddings_len + ): + model.resize_token_embeddings(embeddings_len) + else: + model.tie_weights() + + if ( + hasattr(model, "config") + and hasattr(model.config, "max_position_embeddings") + and model.config.max_position_embeddings + and cfg.sequence_len > model.config.max_position_embeddings + ): + LOG.warning( + f"increasing model.config.max_position_embeddings from {model.config.max_position_embeddings} to {cfg.sequence_len}" + ) + model.config.max_position_embeddings = cfg.sequence_len + + if ( + hasattr(model, "config") + and hasattr(model.config, "bos_token_id") + and model.config.bos_token_id + and model.config.bos_token_id != tokenizer.bos_token_id + ): + model.config.bos_token_id = tokenizer.bos_token_id + + if ( + hasattr(model, "config") + and hasattr(model.config, "eos_token_id") + and model.config.eos_token_id + and model.config.eos_token_id != tokenizer.eos_token_id + ): + model.config.eos_token_id = tokenizer.eos_token_id + + if hasattr(model, "device") and model.device.type in ("cuda", "mps"): + log_gpu_memory_usage(LOG, "after model load", model.device) + + # make sure these are fp32 per Ramesh et al. (2021) + embedding_modules = get_linear_embedding_layers(cfg.model_config_type) + if not cfg.fsdp: + # FSDP doesn't like mixed Float and BFloat16 + for name, module in model.named_modules(): + if "norm" in name or name.endswith(".gate"): + module.to(torch.float32) + if model_config.model_type == "btlm": + # don't upcast lm_head for btlm + continue + if any(m in name for m in embedding_modules): + if hasattr(module, "weight"): + module.to(torch.float32) + + needs_fa2_dtype = cfg.adapter or cfg.fsdp + skip_prepare_model_for_kbit_training = False + + if cfg.model_config_type == "mixtral" and is_deepspeed_zero3_enabled(): + from deepspeed.utils import ( # pylint: disable=no-name-in-module + set_z3_leaf_modules, + ) + from transformers.models.mixtral.modeling_mixtral import MixtralSparseMoeBlock + + set_z3_leaf_modules(model, [MixtralSparseMoeBlock]) + + if cfg.model_config_type == "qwen" and cfg.adapter == "lora": + # Qwen doesn't play nicely with LoRA if this is enabled + skip_prepare_model_for_kbit_training = True + + loftq_bits = cfg.peft and cfg.peft.loftq_config and cfg.peft.loftq_config.loftq_bits + if cfg.adapter == "lora" and loftq_bits: + skip_prepare_model_for_kbit_training = True + + if qlora_fsdp: + skip_prepare_model_for_kbit_training = True + + if cfg.adapter in ["lora", "qlora"]: + if cfg.gradient_checkpointing: + model.gradient_checkpointing_enable( + gradient_checkpointing_kwargs=cfg.gradient_checkpointing_kwargs + ) + if ( + cfg.load_in_8bit or cfg.load_in_4bit + ) and not skip_prepare_model_for_kbit_training: + LOG.info("converting PEFT model w/ prepare_model_for_kbit_training") + model = prepare_model_for_kbit_training( + model, use_gradient_checkpointing=cfg.gradient_checkpointing + ) + needs_fa2_dtype = True + + # LlamaRMSNorm layers are in fp32 after kbit_training or full finetune, so we need to + # convert them back to fp16/bf16 for flash-attn compatibility. + if (needs_fa2_dtype or cfg.flash_attention) and not qlora_fsdp: + LOG.info("converting modules to %s for flash attention", cfg.torch_dtype) + for name, module in model.named_modules(): + if "norm" in name: + module.to(cfg.torch_dtype) + if any(m in name for m in embedding_modules): + if hasattr(module, "weight"): + module.to(cfg.torch_dtype) + + lora_config = None + if not reference_model or cfg.lora_model_dir: + # if we're not loading the reference model, then we're loading the model for training + # then the dpo trainer doesn't want the peft model loaded over it, it just wants the lora/peft config + if cfg.adapter and cfg.rl in ["dpo", "ipo", "kto_pair"] and not cfg.merge_lora: + _, lora_config = load_lora(model, cfg, inference=False, config_only=True) + else: + model, lora_config = load_adapter(model, cfg, cfg.adapter) + + if ( + cfg.ddp + and not load_in_8bit + and not (cfg.rl and cfg.load_in_4bit) + and not qlora_fsdp + ): + # TODO revaldate this conditional + model.to(f"cuda:{cfg.local_rank}") + + if torch.cuda.device_count() > 1 and int(os.getenv("WORLD_SIZE", "1")) == 1: + setattr(model, "is_parallelizable", True) + setattr(model, "model_parallel", True) + + requires_grad = [] + for name, param in model.named_parameters(recurse=True): + if param.requires_grad: + requires_grad.append(f"{name}: {param.requires_grad}") + if len(requires_grad) == 0: + LOG.warning("there are no parameters that require gradient updates") + if hasattr(model, "config"): + model.config.use_cache = False + + if cfg.flash_optimum: + from optimum.bettertransformer import BetterTransformer + + model = BetterTransformer.transform(model) + + if cfg.adapter is not None: + log_gpu_memory_usage(LOG, "after adapters", model.device) + + # TODO resume_from_checkpoint handling + return model, lora_config + + +def load_adapter(model, cfg, adapter, inference=False): + # type: (PreTrainedModel, DictDefault, Optional[str], bool) -> Tuple[PreTrainedModel, Optional[PeftConfig]] + + if adapter is None: + return model, None + if hasattr(model, "enable_input_require_grads"): + model.enable_input_require_grads() + if adapter in ["lora", "qlora"]: + return load_lora(model, cfg, inference=inference) + if adapter == "llama-adapter": + return load_llama_adapter(model, cfg) + + raise NotImplementedError(f"{adapter} peft adapter not available") + + +def load_llama_adapter(model, cfg): + # type: (PreTrainedModel, DictDefault) -> Tuple[PreTrainedModel, Optional[PeftConfig]] + from peft import AdaptionPromptConfig, get_peft_model + + peft_config = AdaptionPromptConfig( + adapter_layers=cfg.peft_adapter.layers, # layers (L) + adapter_len=cfg.peft_adapter.len, # prompt length (K) + task_type="CAUSAL_LM", + ) + + if cfg.lora_model_dir: + LOG.debug("Loading pretrained PEFT - llama_adapter") + model = PeftModel.from_pretrained( + model, + cfg.lora_model_dir, + torch_dtype=torch.float16, + ) + else: + model = get_peft_model(model, peft_config) + + model.print_trainable_parameters() + + return model, peft_config + + +def find_all_linear_names(model): + cls = (bnb.nn.Linear4bit, bnb.nn.Linear8bitLt, torch.nn.Linear, QuantLinear) + lora_module_names = set() + for name, module in model.named_modules(): + if ( + isinstance(module, cls) + or "Linear" in module.__class__.__name__ + and module.__class__.__name__ not in ("LlamaLinearScalingRotaryEmbedding",) + ): + names = name.split(".") + lora_module_names.add(names[0] if len(names) == 1 else names[-1]) + + embedding_modules = get_linear_embedding_layers(model.config.model_type) + output_embedding = embedding_modules[1] + if output_embedding in lora_module_names: # needed for 16-bit + lora_module_names.remove(output_embedding) + + return list(lora_module_names) + + +def setup_quantized_meta_for_peft(model: nn.Module): + """Replaces `quant_state.to` with a dummy function to prevent PEFT from moving `quant_state` to meta device""" + + def temp_to_method(self, *args, **kwargs): # pylint: disable=unused-argument + return self + + for param in model.parameters(): + if isinstance(param, Params4bit): + param.quant_state._orig_to = ( # pylint: disable=protected-access + param.quant_state.to + ) + param.quant_state.to = types.MethodType(temp_to_method, param.quant_state) + + +def setup_quantized_peft_meta_for_training(model: nn.Module): + """Replaces dummy `quant_state.to` method with the original function to allow training to continue""" + for param in model.parameters(): + if isinstance(param, Params4bit) and hasattr(param.quant_state, "_orig_to"): + param.quant_state.to = ( + param.quant_state._orig_to # pylint: disable=protected-access + ) + param.quant_state._orig_to = None # pylint: disable=protected-access + + +def load_lora(model, cfg, inference=False, config_only=False): + # type: (PreTrainedModel, DictDefault, bool, bool) -> Tuple[Optional[PreTrainedModel], Optional[PeftConfig]] + + from peft import LoraConfig, get_peft_model + + lora_target_modules = list(cfg.lora_target_modules or []) + + if cfg.lora_target_linear: + linear_names = find_all_linear_names(model) + LOG.info(f"found linear modules: {repr(linear_names)}") + lora_target_modules = list(set(lora_target_modules + linear_names)) + + lora_config_kwargs = {} + loftq_bits = cfg.peft and cfg.peft.loftq_config and cfg.peft.loftq_config.loftq_bits + if loftq_bits: + lora_config_kwargs["loftq_config"] = LoftQConfig(loftq_bits=loftq_bits) + lora_config_kwargs["init_lora_weights"] = "loftq" + if cfg.peft_use_dora: + lora_config_kwargs["use_dora"] = cfg.peft_use_dora + if cfg.peft_use_rslora: + lora_config_kwargs["use_rslora"] = cfg.use_rslora + + lora_config = LoraConfig( + r=cfg.lora_r, + lora_alpha=cfg.lora_alpha, + target_modules=lora_target_modules, + layers_to_transform=cfg.peft_layers_to_transform, + lora_dropout=cfg.lora_dropout, + fan_in_fan_out=cfg.lora_fan_in_fan_out, + modules_to_save=cfg.lora_modules_to_save if cfg.lora_modules_to_save else None, + bias="none", + task_type="CAUSAL_LM", + **lora_config_kwargs, + ) + + if config_only: + return None, lora_config + + rank = int(os.environ.get("LOCAL_RANK", 0)) + + if cfg.fsdp and cfg.adapter == "qlora" and rank != 0: + setup_quantized_meta_for_peft(model) + + if cfg.lora_model_dir: + LOG.debug("Loading pretrained PEFT - LoRA") + model_kwargs: Any = {} + if cfg.lora_on_cpu: + model_kwargs["max_memory"] = {"cpu": "256GiB"} + model_kwargs["device_map"] = {"": "cpu"} + model = PeftModel.from_pretrained( + model, + cfg.lora_model_dir, + is_trainable=(not inference), + **model_kwargs, + ) + else: + model = get_peft_model(model, lora_config) + + if rank == 0: + model.print_trainable_parameters() + elif cfg.fsdp and cfg.adapter == "qlora": + setup_quantized_peft_meta_for_training(model) + + return model, lora_config diff --git a/src/axolotl/utils/samplers/__init__.py b/src/axolotl/utils/samplers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..96e00a5d26fbd3359f3d23192c396dea8d184c71 --- /dev/null +++ b/src/axolotl/utils/samplers/__init__.py @@ -0,0 +1,5 @@ +""" +axolotl samplers module +""" +from .multipack import MultipackBatchSampler # noqa: F401 +from .utils import get_dataset_lengths # noqa: F401 diff --git a/src/axolotl/utils/samplers/multipack.py b/src/axolotl/utils/samplers/multipack.py new file mode 100644 index 0000000000000000000000000000000000000000..cf47d9639b76bb2677001b238b4198ed95c2ab11 --- /dev/null +++ b/src/axolotl/utils/samplers/multipack.py @@ -0,0 +1,202 @@ +# pylint: skip-file +""" +Multipack Batch Sampler +""" +import logging +import math +import os +from typing import Any, Iterable, List, Union + +import numba +import numpy as np +from torch.utils.data import BatchSampler, Sampler + +LOG = logging.getLogger("axolotl.utils.samplers.multipack") + + +@numba.njit +def ffd_check(a: np.ndarray, c: int, n: int): + # First-fit-decreasing bin packing + # Check if a[] could fit in n bins with capacity c + # https://en.wikipedia.org/wiki/First-fit-decreasing_bin_packing + + a = np.sort(a)[::-1] + bins = np.full((n,), c, dtype=a.dtype) + for size in a: + not_found = True + for idx in range(n): + if bins[idx] >= size: + bins[idx] -= size + not_found = False + break + + if not_found: + return False + + return True + + +@numba.njit +def ffd_with_result(a: np.ndarray, c: int, start_index: int): + # First-fit-decreasing bin packing (with result return) + + indices = np.argsort(a)[::-1] + a = a[indices] + + bins: List[Any] = [] + bins_result: List[Any] = [] + for a_id, size in enumerate(a): + add_new = True + for idx in range(len(bins)): + if bins[idx] >= size: + bins[idx] -= size + bins_result[idx].append(indices[a_id] + start_index) + add_new = False + break + + if add_new: + bins.append(c - size) + bins_result.append([indices[a_id] + start_index]) + + return bins_result + + +@numba.njit +def allocate( + lengths: np.ndarray, lengths_cumsum: np.ndarray, rank: int, c: int, n: int +): + # Dynamic batch allocator, similar to Multifit + # https://en.wikipedia.org/wiki/Multifit_algorithm + # ~99.5% efficiency on OpenChat training set (12 * 2048 ctx len) + + s = 0 + start_index = 0 + result = [] + + while True: + # binary search [l, r) + left = 1 + right = 1 + np.searchsorted(lengths_cumsum[start_index:], s + c * n, "right") + + while right - left > 1: + mid = (left + right) // 2 + if ffd_check(lengths[start_index : start_index + mid], c, n): + left = mid + else: + right = mid + + # use length l + batch = ffd_with_result( + lengths[start_index : start_index + left], c, start_index + ) + assert len(batch) <= n + if len(batch) < n: + break + + start_index += left + s = lengths_cumsum[start_index - 1] + + # add local rank + result.append(batch[rank]) + + return result, s, len(result) * c * n + + +class MultipackBatchSampler(BatchSampler): + """ + Batch Sampler class for multipack + """ + + def __init__( + self, + sampler: Union[Sampler[int], Iterable[int]], + batch_size: int, + drop_last: bool, + batch_max_len: int, + lengths: np.ndarray, + packing_efficiency_estimate: float = 1.0, + ): + super().__init__(sampler, batch_size, drop_last) + self.batch_size = batch_size + self.batch_max_len = batch_max_len + self.lengths: np.ndarray = lengths + self.packing_efficiency_estimate = packing_efficiency_estimate or 1.0 + + assert isinstance(self.lengths, np.ndarray) + + self.epoch = 0 + + # statistics + self.eff_total_used = 0 + self.eff_total_slots = 0 + + def set_epoch(self, epoch: int): + self.epoch = epoch + + def generate_batches(self, set_stats=False): + indices = [idx for idx in self.sampler] + + lengths = self.lengths[indices] + lengths_cumsum = np.cumsum(lengths) + + batches, total_used, total_slots = allocate( + lengths=lengths, + lengths_cumsum=lengths_cumsum, + rank=0, + c=self.batch_max_len, + n=1, + ) + + batches = [ + [ + [indices[b_idx] for b_idx in batch] + for batch in batches[i : i + self.batch_size] + ] + for i in range(0, len(batches), self.batch_size) + ] + + # statistics + if set_stats: + self.eff_total_used += total_used + self.eff_total_slots += total_slots + + return batches + + def __iter__(self): + batches = self.generate_batches(set_stats=True) + return iter(batches) + + def num_batches(self): + batches = self.generate_batches(set_stats=True) + return len(batches) + + def efficiency(self): + return self.eff_total_used / self.eff_total_slots + + def __len__(self): + self.num_batches() + return self._len_est() + + def _len_est(self): + world_size = int(os.getenv("WORLD_SIZE", "1")) + lengths_sum = np.sum(self.lengths) + lengths_sum_per_device = lengths_sum // world_size + LOG.info( + f"packing_efficiency_estimate: {self.packing_efficiency_estimate} " + f"total_num_tokens per device: {lengths_sum_per_device}" + ) + + # shave off 1% + 1 for dealing with variance in packing from random sampler to sampler + return max( + 0, + ( + world_size + * math.floor( + 0.99 + * lengths_sum_per_device + / self.packing_efficiency_estimate + // (self.batch_max_len * self.batch_size) + ) + - 1 + ), + ) diff --git a/src/axolotl/utils/samplers/utils.py b/src/axolotl/utils/samplers/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..e4af4e5f35f3cc25a8e3bb8fc908c3400cf60eff --- /dev/null +++ b/src/axolotl/utils/samplers/utils.py @@ -0,0 +1,17 @@ +""" +helper util to calculate dataset lengths +""" +import numpy as np + + +def get_dataset_lengths(dataset): + if "length" in dataset.data.column_names: + lengths = np.array(dataset.data.column("length")) + elif "position_ids" in dataset.data.column_names: + position_ids = dataset.data.column("position_ids") + lengths = np.array([x[-1] + 1 for x in position_ids]) + else: + input_ids = dataset.data.column("input_ids") + lengths = np.vectorize(len)(np.array(input_ids, dtype=object)) + return lengths + return lengths diff --git a/src/axolotl/utils/schedulers.py b/src/axolotl/utils/schedulers.py new file mode 100644 index 0000000000000000000000000000000000000000..94387e5ab88af036e82db3066ef291aeaec01e11 --- /dev/null +++ b/src/axolotl/utils/schedulers.py @@ -0,0 +1,219 @@ +"""Module for custom LRScheduler class""" +import math +from functools import partial + +from torch.optim import Optimizer +from torch.optim.lr_scheduler import LambdaLR, LRScheduler + + +class InterpolatingLogScheduler(LRScheduler): + """ + A scheduler that interpolates learning rates in a logarithmic fashion + """ + + def __init__(self, optimizer, num_steps, min_lr, max_lr, last_epoch=-1): + """A scheduler that interpolates learning rates in a logarithmic fashion + + Args: + - optimizer: pytorch optimizer + - num_steps: int, the number of steps over which to increase from the min_lr to the max_lr + - min_lr: float, the minimum learning rate + - max_lr: float, the maximum learning rate + + Usage: + fc = nn.Linear(1,1) + optimizer = optim.Adam(fc.parameters()) + lr_scheduler = InterpolatingLogScheduler(optimizer, num_steps=400, min_lr=1e-6, max_lr=1e-4) + """ + self.num_steps = num_steps + self.min_lr = min_lr + self.max_lr = max_lr + self.q = (max_lr / min_lr) ** ( # pylint: disable=invalid-name + 1 / (num_steps - 1) + ) + super().__init__(optimizer, last_epoch) + + def get_lr(self): + if self.last_epoch <= 0: + lrs = [self.min_lr for base_lr in self.base_lrs] + elif self.last_epoch < self.num_steps: + lrs = [ + self.min_lr * (self.q ** (self.last_epoch - 1)) + for base_lr in self.base_lrs + ] + else: + lrs = [self.max_lr for base_lr in self.base_lrs] + + return lrs + + +def _get_cosine_schedule_with_quadratic_warmup_lr_lambda( + current_step: int, + *, + num_warmup_steps: int, + num_training_steps: int, + num_cycles: float, +): + if current_step < num_warmup_steps: + return (float(current_step) / float(max(1, num_warmup_steps))) ** 2 + progress = float(current_step - num_warmup_steps) / float( + max(1, num_training_steps - num_warmup_steps) + ) + return max( + 0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress)) + ) + + +def get_cosine_schedule_with_quadratic_warmup( + optimizer: Optimizer, + num_warmup_steps: int, + num_training_steps: int, + num_cycles: float = 0.5, + last_epoch: int = -1, +): + """ + Create a schedule with a learning rate that decreases following the values of the cosine function between the + initial lr set in the optimizer to 0, after a warmup period during which it increases linearly between 0 and the + initial lr set in the optimizer. + + Args: + optimizer ([`~torch.optim.Optimizer`]): + The optimizer for which to schedule the learning rate. + num_warmup_steps (`int`): + The number of steps for the warmup phase. + num_training_steps (`int`): + The total number of training steps. + num_cycles (`float`, *optional*, defaults to 0.5): + The number of waves in the cosine schedule (the defaults is to just decrease from the max value to 0 + following a half-cosine). + last_epoch (`int`, *optional*, defaults to -1): + The index of the last epoch when resuming training. + + Return: + `torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule. + """ + + lr_lambda = partial( + _get_cosine_schedule_with_quadratic_warmup_lr_lambda, + num_warmup_steps=num_warmup_steps, + num_training_steps=num_training_steps, + num_cycles=num_cycles, + ) + return LambdaLR(optimizer, lr_lambda, last_epoch) + + +def _get_cosine_schedule_with_min_lr_lambda( + current_step: int, + *, + num_warmup_steps: int, + num_training_steps: int, + min_lr_ratio: float, +): + # Warm up + if current_step < num_warmup_steps: + return float(current_step) / float(max(1, num_warmup_steps)) + + # Cosine learning rate decay + progress = float(current_step - num_warmup_steps) / float( + max(1, num_training_steps - num_warmup_steps) + ) + scaling = 0.5 * (1.0 + math.cos(math.pi * progress)) + return (1 - min_lr_ratio) * scaling + min_lr_ratio + + +def get_cosine_schedule_with_min_lr( + optimizer: Optimizer, + num_warmup_steps: int, + num_training_steps: int, + min_lr_ratio: float = 0.0, +): + """ + Create a learning rate schedule which has: + - linear warmup from 0 -> `max_lr` over `num_warmup_steps` + - cosine learning rate annealing from `max_lr` -> `min_lr` over `num_training_steps` + """ + + lr_lambda = partial( + _get_cosine_schedule_with_min_lr_lambda, + num_warmup_steps=num_warmup_steps, + num_training_steps=num_training_steps, + min_lr_ratio=min_lr_ratio, + ) + return LambdaLR(optimizer, lr_lambda) + + +def _get_cosine_schedule_with_warmup_decay_constant_lr_lambda( + current_step: int, + *, + num_warmup_steps: int, + num_training_steps: int, + constant_lr_ratio: float, + min_lr_ratio: float, + num_cycles: float, +): + if current_step < num_warmup_steps: + return float(current_step) / float(max(1, num_warmup_steps)) + + num_constant_steps = int(num_training_steps * constant_lr_ratio) + current_step = min(current_step, num_constant_steps) + + progress = float(current_step - num_warmup_steps) / float( + max(1, num_constant_steps - num_warmup_steps) + ) + + return ( + max( + 0, + (1 - min_lr_ratio) + * 0.5 + * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress)), + ) + + min_lr_ratio + ) + + +def get_cosine_schedule_with_warmup_decay_constant( + optimizer: Optimizer, + num_warmup_steps: int, + num_training_steps: int, + constant_lr_ratio: float, + min_lr_ratio: float, + num_cycles: float = 0.5, + last_epoch: int = -1, +): + """ + Implementation of Continual Pre-Training of Large Language Models: How to (re)warm your model? (https://arxiv.org/pdf/2308.04014.pdf) + Create a schedule with a learning rate that decreases following the values of the cosine function between the + initial lr set in the optimizer to min_lr_ratio until num_training_steps * constant_lr_ratio, after constant_rate returns constant value of min_rate + , after a warmup period during which it increases linearly between 0 and the initial lr set in the optimizer. + + Args: + optimizer ([`~torch.optim.Optimizer`]): + The optimizer for which to schedule the learning rate. + num_warmup_steps (`int`): + The number of steps for the warmup phase. + num_training_steps (`int`): + The total number of training steps. + constant_lr_ratio: (`float`): + The ratio of num_training_steps to decrease by cosine function. + min_lr_ratio: (`float): + The ratio of maximum learning rate for cosine function to decay to minimum learning rate. + num_cycles (`float`, *optional*, defaults to 0.5): + The number of waves in the cosine schedule (the defaults is to just decrease from the max value to 0 + following a half-cosine). + last_epoch (`int`, *optional*, defaults to -1): + The index of the last epoch when resuming training. + + Return: + `torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule. + """ + + lr_lambda = partial( + _get_cosine_schedule_with_warmup_decay_constant_lr_lambda, + num_warmup_steps=num_warmup_steps, + num_training_steps=num_training_steps, + constant_lr_ratio=constant_lr_ratio, + min_lr_ratio=min_lr_ratio, + num_cycles=num_cycles, + ) + return LambdaLR(optimizer, lr_lambda, last_epoch) diff --git a/src/axolotl/utils/tokenization.py b/src/axolotl/utils/tokenization.py new file mode 100644 index 0000000000000000000000000000000000000000..afbdef87786bd296e73a2619186d8bb80539bd57 --- /dev/null +++ b/src/axolotl/utils/tokenization.py @@ -0,0 +1,102 @@ +"""Module for tokenization utilities""" + + +import logging +import re +from typing import Dict, List + +from termcolor import colored + +LOG = logging.getLogger("axolotl") + + +def check_dataset_labels(dataset, tokenizer, num_examples=5, text_only=False): + # the dataset is already shuffled, so let's just check the first 5 elements + for idx in range(num_examples): + check_example_labels(dataset[idx], tokenizer, text_only=text_only) + + +def check_example_labels(example, tokenizer, text_only=False): + # Get the input_ids, labels, and attention_mask from the dataset + input_ids = example["input_ids"] + labels = example["labels"] + + # You can compare the input_ids and labels element-wise + # Remember to ignore positions with IGNORE_TOKEN_ID (if you use it) or attention_mask equal to 0 + colored_tokens = [] + for _, (input_id, label_id) in enumerate(zip(input_ids, labels)): + decoded_input_token = tokenizer.decode(input_id) + # Choose the color based on whether the label has the ignore value or not + color = "red" if label_id == -100 else ("yellow" if label_id == 0 else "green") + colored_token = colored(decoded_input_token, color) + ( + not text_only and colored(f"({label_id}, {input_id})", "white") or "" + ) + colored_tokens.append(colored_token) + + delimiter = "" if text_only else " " + LOG.info(delimiter.join(colored_tokens)) + LOG.info("\n\n\n") + + return " ".join(colored_tokens) + + +GLAIVE_ROLES = ["USER", "ASSISTANT", "FUNCTION RESPONSE"] +GLAIVE_TO_SHAREGPT_ROLE = { + "SYSTEM": "system", + "USER": "human", + "ASSISTANT": "gpt", + "FUNCTION RESPONSE": "tool", +} + +GLAIVE_MSG_REGEX = re.compile(rf"({'|'.join(GLAIVE_ROLES)}): ") + + +def chatml_to_conversation(row: Dict[str, str]) -> List[Dict[str, str]]: + """ + Converts a ChatML formatted row to a list of messages in ShareGPT format. + Initially based off https://github.com/lilacai/lilac/blob/main/notebooks/GlaiveToShareGPT.ipynb. + """ + + system_prompt = row.get("system") + if system_prompt: + system_prompt = system_prompt.removeprefix("SYSTEM: ") + + chat_str = row["chat"] + chat_msgs = [s.strip() for s in GLAIVE_MSG_REGEX.split(chat_str) if s] + + chat_msg_dicts = [ + {"from": GLAIVE_TO_SHAREGPT_ROLE[role], "value": value} + for role, value in zip(chat_msgs[::2], chat_msgs[1::2]) + ] + + if system_prompt: + chat_msg_dicts = [ + {"from": GLAIVE_TO_SHAREGPT_ROLE["SYSTEM"], "value": system_prompt} + ] + chat_msg_dicts + + return chat_msg_dicts + + +def merge_consecutive_messages(messages): + """ + Merge consecutive messages from the same sender into a single message. + This can be useful with datasets that contain multiple consecutive tool calls. + """ + + merged_messages = [] + current_from = None + current_message = "" + + for msg in messages: + if current_from == msg["from"]: + current_message += msg["value"] + else: + if current_from is not None: + merged_messages.append({"from": current_from, "value": current_message}) + current_from = msg["from"] + current_message = msg["value"] + + if current_from is not None: + merged_messages.append({"from": current_from, "value": current_message}) + + return merged_messages diff --git a/src/axolotl/utils/trainer.py b/src/axolotl/utils/trainer.py new file mode 100644 index 0000000000000000000000000000000000000000..380264a7ac3b6d554895f02dff21faeaa35487fe --- /dev/null +++ b/src/axolotl/utils/trainer.py @@ -0,0 +1,338 @@ +"""Module containing the Trainer class and related functions""" +import math +import os +from contextlib import contextmanager +from functools import partial +from typing import List + +import numpy as np +import torch +import torch.cuda +from accelerate.logging import get_logger +from datasets import set_caching_enabled +from torch.utils.data import DataLoader, RandomSampler + +from axolotl.core.trainer_builder import HFCausalTrainerBuilder, HFDPOTrainerBuilder +from axolotl.utils.distributed import is_main_process, reduce_and_broadcast, zero_first +from axolotl.utils.samplers import MultipackBatchSampler, get_dataset_lengths + +LOG = get_logger("axolotl") + + +@torch.jit.script +def weighted_cross_entropy( + logits: torch.Tensor, labels: torch.Tensor, weights: torch.Tensor +): + # Flatten the logits, labels, and weights tensors + logits = logits.view( + -1, logits.size(-1) + ) # logits becomes of shape [batch_size*sequence_length, vocab_size] + labels = labels.view(-1) # labels becomes of shape [batch_size*sequence_length] + weights = weights.view(-1) # weights becomes of shape [batch_size*sequence_length] + + # Compute the unweighted cross entropy loss + losses = torch.nn.functional.cross_entropy(logits, labels, reduction="none") + + # Apply the weights to the losses and compute their sum + return (weights * losses).sum() + + +@torch.jit.script +def create_weighted_mask(labels: torch.Tensor): + # Check if the tensor is 2D. If not, unsqueeze it to make it 2D + if len(labels.shape) == 1: + labels = labels.unsqueeze(0) + + weights = torch.zeros_like(labels).float() + for i in range(labels.shape[0]): + mask = labels[i] != -100 + + # Create a tensor to track group ids + group_ids = torch.zeros_like(labels[i]).int() + curr_group_id = 0 + + for j in range(1, len(labels[i])): + if mask[j] and not mask[j - 1]: # switch from masked to unmasked label + curr_group_id += 1 # start new group + group_ids[j] = ( + curr_group_id if mask[j] else 0 + ) # assign group id if unmasked label + + # Count only unmasked labels in each group + group_counts = torch.bincount(group_ids[mask]) + + mask_weights = torch.zeros_like(labels[i]).float() + mask_weights[mask] = 1.0 / group_counts[group_ids[mask]] + + weights[i] = mask_weights + + return weights.squeeze() # squeeze the output to match the input dimension + + +def trainer_weighted_loss(model_output, labels, shift_labels=True): + logits = ( + model_output["logits"] if isinstance(model_output, dict) else model_output[0] + ) + if shift_labels: + logits = logits[..., :-1, :].contiguous() + labels = labels[..., 1:].contiguous() + + weights = create_weighted_mask(labels) + return weighted_cross_entropy(logits, labels, weights) + + +@contextmanager +def disable_datasets_caching(): + try: + set_caching_enabled(False) + yield + finally: + set_caching_enabled(True) + + +def add_position_ids(sample): + sample_len = len(sample["input_ids"]) + sample["position_ids"] = torch.arange(len(sample["input_ids"])) + sample["length"] = sample_len + return sample + + +def add_length(sample): + sample["length"] = len(sample["input_ids"]) + return sample + + +def drop_long_seq(sample, sequence_len=2048): + return len(sample["input_ids"]) <= sequence_len and len(sample["input_ids"]) > 0 + + +def process_datasets_for_packing(cfg, train_dataset, eval_dataset): + drop_long = partial(drop_long_seq, sequence_len=cfg.sequence_len) + with zero_first(is_main_process()): + if cfg.is_preprocess: + min_input_len = np.min(get_dataset_lengths(train_dataset)) + LOG.debug(f"min_input_len: {min_input_len}", main_process_only=True) + max_input_len = np.max(get_dataset_lengths(train_dataset)) + LOG.debug(f"max_input_len: {max_input_len}", main_process_only=True) + + if ( + cfg.is_mistral_derived_model and cfg.flash_attention + ) or cfg.model_config_type == "mamba": + LOG.info("dropping attention_mask column") + train_dataset = train_dataset.remove_columns("attention_mask") + if eval_dataset: + eval_dataset = eval_dataset.remove_columns("attention_mask") + + if cfg.model_config_type == "falcon": + LOG.info("dropping token_type_ids column") + train_dataset = train_dataset.remove_columns("token_type_ids") + if eval_dataset: + eval_dataset = eval_dataset.remove_columns("token_type_ids") + + train_dataset = train_dataset.filter( + drop_long, + num_proc=cfg.dataset_processes, + load_from_cache_file=not cfg.is_preprocess, + desc="Dropping Long Sequences", + ) + if eval_dataset: + eval_dataset = eval_dataset.filter( + drop_long, + num_proc=cfg.dataset_processes, + load_from_cache_file=not cfg.is_preprocess, + desc="Dropping Long Sequences", + ) + + if cfg.group_by_length: + train_dataset = train_dataset.map( + add_length, + num_proc=cfg.dataset_processes, + load_from_cache_file=not cfg.is_preprocess, + desc="Group By Length", + ) + + if cfg.sample_packing: + train_dataset = train_dataset.map( + add_position_ids, + num_proc=cfg.dataset_processes, + load_from_cache_file=not cfg.is_preprocess, + desc="Add position_id column (Sample Packing)", + ) + if cfg.eval_sample_packing is not False: + if eval_dataset: + eval_dataset = eval_dataset.map( + add_position_ids, + num_proc=cfg.dataset_processes, + load_from_cache_file=not cfg.is_preprocess, + desc="Add position_id column (Sample Packing)", + ) + + return train_dataset, eval_dataset + + +def process_pretraining_datasets_for_packing(train_dataset, sequence_len): + drop_long = partial(drop_long_seq, sequence_len=sequence_len) + + train_dataset = train_dataset.filter( + drop_long, + desc="Dropping Long Sequences", + ) + train_dataset = train_dataset.map( + add_position_ids, + desc="Add position_id column (Pretraining Sample Packing)", + ) + return train_dataset + + +def calculate_total_num_steps(cfg, train_dataset, update=True): + if not cfg.total_num_tokens: + total_num_tokens = np.sum( + train_dataset.data.column("input_ids") + .to_pandas() + .apply(lambda x: len(x)) # pylint: disable=unnecessary-lambda + .values + ) + LOG.debug(f"total_num_tokens: {total_num_tokens}", main_process_only=True) + if update: + cfg.total_num_tokens = total_num_tokens + + skip_estimates = cfg.model_config_type == "mamba" + + if not skip_estimates and not cfg.total_supervised_tokens: + total_supervised_tokens = ( + train_dataset.data.column("labels") + .to_pandas() + .apply(lambda x: np.sum(np.array(x) != -100)) + .sum() + ) + LOG.debug( + f"`total_supervised_tokens: {total_supervised_tokens}`", + main_process_only=True, + ) + if update: + cfg.total_supervised_tokens = total_supervised_tokens + + if not skip_estimates and cfg.sample_packing: + # we have to drop anything longer then sequence len otherwise + # flash attention with position ids fails + + if cfg.sample_packing_eff_est: + total_num_steps = ( + # match count to len est in dataloader + ( + math.floor( + 0.99 + * cfg.total_num_tokens + / cfg.sample_packing_eff_est + / cfg.sequence_len + // cfg.batch_size + // int(os.environ.get("WORLD_SIZE", 1)) + ) + - 1 + ) + * cfg.num_epochs + ) + LOG.debug( + f"total_num_tokens: {cfg.total_num_tokens}, total_num_steps: {total_num_steps}", + main_process_only=True, + ) + else: + if cfg.flash_attention: + batch_size = 1 + batch_max_len = cfg.micro_batch_size * cfg.sequence_len + else: + batch_size = cfg.micro_batch_size + batch_max_len = cfg.sequence_len + sampler = MultipackBatchSampler( + sampler=RandomSampler(train_dataset), + batch_size=batch_size, + drop_last=True, + batch_max_len=batch_max_len, + lengths=get_dataset_lengths(train_dataset), + ) + + data_loader = DataLoader( + train_dataset.remove_columns(["length"]), + batch_sampler=sampler, + ) + data_loader_len = len(data_loader) // cfg.batch_size + actual_eff = sampler.efficiency() + LOG.debug(f"data_loader_len: {data_loader_len}", main_process_only=True) + # FIXME: is there a bug here somewhere? the total num steps depends + # on the agreed on value for sample_packing_eff_est + total_num_steps = int( + math.floor( + data_loader_len + * cfg.num_epochs + / int(os.environ.get("WORLD_SIZE", 1)) + ) + ) + + def calc_sample_packing_eff_est(estimates: List[float]): + LOG.info(f"sample_packing_eff_est across ranks: {repr(estimates)}") + return max(estimates) + + sample_packing_actual_eff_all = reduce_and_broadcast( + lambda: actual_eff, + calc_sample_packing_eff_est, + ) + sample_packing_eff_est = ( + math.ceil(sample_packing_actual_eff_all * 100.0) / 100.0 + ) + if update: + cfg.sample_packing_eff_est = sample_packing_eff_est + LOG.debug( + f"sample_packing_eff_est: {cfg.sample_packing_eff_est}", + main_process_only=True, + ) + else: + total_num_steps = int( + math.ceil( + len(train_dataset) + * cfg.num_epochs + / int(os.environ.get("WORLD_SIZE", 1)) + / cfg.batch_size + ) + ) + LOG.debug(f"total_num_steps: {total_num_steps}", main_process_only=True) + return total_num_steps + + +def setup_fsdp_envs(cfg): + os.environ["ACCELERATE_USE_FSDP"] = "true" + if cfg.fsdp_config.fsdp_offload_params: + os.environ["FSDP_OFFLOAD_PARAMS"] = "true" + if cfg.fsdp_config.fsdp_sync_module_states: + os.environ["FSDP_SYNC_MODULE_STATES"] = "true" + if cfg.fsdp_config.fsdp_cpu_ram_efficient_loading: + os.environ["FSDP_CPU_RAM_EFFICIENT_LOADING"] = "true" + if cfg.fsdp_config.fsdp_use_orig_params: + os.environ["FSDP_USE_ORIG_PARAMS"] = "true" + if cfg.fsdp_config.fsdp_state_dict_type: + os.environ["FSDP_STATE_DICT_TYPE"] = cfg.fsdp_config.fsdp_state_dict_type + if cfg.fsdp_config.fsdp_transformer_layer_cls_to_wrap: + os.environ[ + "FSDP_TRANSFORMER_CLS_TO_WRAP" + ] = cfg.fsdp_config.fsdp_transformer_layer_cls_to_wrap + + +def prepare_optim_env(cfg): + if cfg.fsdp: + setup_fsdp_envs(cfg) + elif cfg.deepspeed: + os.environ["ACCELERATE_USE_DEEPSPEED"] = "true" + os.environ["ACCELERATE_DEEPSPEED_CONFIG_FILE"] = cfg.deepspeed + + +def setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer, total_num_steps): + if cfg.rl in ["dpo", "ipo", "kto_pair"]: + trainer_builder = HFDPOTrainerBuilder(cfg, model[0], tokenizer) + trainer_builder.model_ref = model[1] + trainer_builder.peft_config = model[2] + else: + trainer_builder = HFCausalTrainerBuilder(cfg, model[0], tokenizer) + + trainer_builder.train_dataset = train_dataset + trainer_builder.eval_dataset = eval_dataset + + return trainer_builder.build(total_num_steps) diff --git a/src/axolotl/utils/wandb_.py b/src/axolotl/utils/wandb_.py new file mode 100644 index 0000000000000000000000000000000000000000..327dd9b6348159942de6d991cd7fd1730d2f5792 --- /dev/null +++ b/src/axolotl/utils/wandb_.py @@ -0,0 +1,21 @@ +"""Module for wandb utilities""" + +import os + +from axolotl.utils.dict import DictDefault + + +def setup_wandb_env_vars(cfg: DictDefault): + for key in cfg.keys(): + if key.startswith("wandb_"): + value = cfg.get(key, "") + + if value and isinstance(value, str) and len(value) > 0: + os.environ[key.upper()] = value + + # Enable wandb if project name is present + if cfg.wandb_project and len(cfg.wandb_project) > 0: + cfg.use_wandb = True + os.environ.pop("WANDB_DISABLED", None) # Remove if present + else: + os.environ["WANDB_DISABLED"] = "true" diff --git a/styles.css b/styles.css new file mode 100644 index 0000000000000000000000000000000000000000..2ddf50c7b4236e4b67c3e9fc369f6a7a562cd27d --- /dev/null +++ b/styles.css @@ -0,0 +1 @@ +/* css styles */ diff --git a/tests/core/test_trainer_builder.py b/tests/core/test_trainer_builder.py new file mode 100644 index 0000000000000000000000000000000000000000..19042639f1a3df9b73b230f2d7a97dc399de56ff --- /dev/null +++ b/tests/core/test_trainer_builder.py @@ -0,0 +1,60 @@ +""" +unit tests for axolotl.core.trainer_builder +""" +import pytest + +from axolotl.core.trainer_builder import HFDPOTrainerBuilder +from axolotl.utils.dict import DictDefault +from axolotl.utils.models import load_model, load_tokenizer + + +@pytest.fixture(name="cfg") +def fixture_cfg(): + return DictDefault( + { + "base_model": "TinyLlama/TinyLlama-1.1B-Chat-v0.6", + "model_type": "AutoModelForCausalLM", + "tokenizer_type": "LlamaTokenizer", + "micro_batch_size": 1, + "gradient_accumulation_steps": 1, + "learning_rate": 0.00005, + "save_steps": 100, + "output_dir": "./model-out", + "warmup_steps": 10, + "gradient_checkpointing": False, + "optimizer": "adamw_torch", + "sequence_len": 2048, + "rl": True, + "adam_beta1": 0.998, + "adam_beta2": 0.9, + "adam_epsilon": 0.00001, + "dataloader_num_workers": 1, + "dataloader_pin_memory": True, + "model_config_type": "llama", + } + ) + + +@pytest.fixture(name="tokenizer") +def fixture_tokenizer(cfg): + return load_tokenizer(cfg) + + +@pytest.fixture(name="model") +def fixture_model(cfg, tokenizer): + return load_model(cfg, tokenizer) + + +class TestHFDPOTrainerBuilder: + """ + TestCase class for DPO trainer builder + """ + + def test_build_training_arguments(self, cfg, model, tokenizer): + builder = HFDPOTrainerBuilder(cfg, model, tokenizer) + training_arguments = builder.build_training_arguments(100) + assert training_arguments.adam_beta1 == 0.998 + assert training_arguments.adam_beta2 == 0.9 + assert training_arguments.adam_epsilon == 0.00001 + assert training_arguments.dataloader_num_workers == 1 + assert training_arguments.dataloader_pin_memory is True diff --git a/tests/e2e/.gitignore b/tests/e2e/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..ad1727ec598318511c9b0620ec5a85e4a243243c --- /dev/null +++ b/tests/e2e/.gitignore @@ -0,0 +1 @@ +last_run_prepared diff --git a/tests/e2e/__init__.py b/tests/e2e/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tests/e2e/patched/__init__.py b/tests/e2e/patched/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/tests/e2e/patched/test_4d_multipack_llama.py b/tests/e2e/patched/test_4d_multipack_llama.py new file mode 100644 index 0000000000000000000000000000000000000000..d74d097237f279da40e8615a02dfc6ba4adcd733 --- /dev/null +++ b/tests/e2e/patched/test_4d_multipack_llama.py @@ -0,0 +1,114 @@ +""" +E2E tests for multipack fft llama using 4d attention masks +""" + +import logging +import os +import unittest +from pathlib import Path + +from axolotl.cli import load_datasets +from axolotl.common.cli import TrainerCliArgs +from axolotl.train import train +from axolotl.utils.config import normalize_config +from axolotl.utils.dict import DictDefault + +from ..utils import require_torch_2_1_1, with_temp_dir + +LOG = logging.getLogger("axolotl.tests.e2e") +os.environ["WANDB_DISABLED"] = "true" + + +class Test4dMultipackLlama(unittest.TestCase): + """ + Test case for Llama models using 4d attention with multipack + """ + + @require_torch_2_1_1 + @with_temp_dir + def test_sdp_lora_packing(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "JackFram/llama-68m", + "flash_attention": False, + "sdp_attention": True, + "sample_packing": True, + "pad_to_sequence_len": True, + "load_in_8bit": True, + "adapter": "lora", + "lora_r": 32, + "lora_alpha": 16, + "lora_dropout": 0.05, + "lora_target_linear": True, + "sequence_len": 1024, + "val_set_size": 0.1, + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + }, + ], + "num_epochs": 2, + "micro_batch_size": 2, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_torch", + "lr_scheduler": "cosine", + "max_steps": 20, + "save_steps": 10, + "eval_steps": 10, + "fp16": True, + } + ) + normalize_config(cfg) + cli_args = TrainerCliArgs() + dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) + + train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + assert (Path(temp_dir) / "adapter_model.bin").exists() + + @with_temp_dir + def test_torch_lora_packing(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "JackFram/llama-68m", + "flash_attention": False, + "sdp_attention": False, + "sample_packing": True, + "pad_to_sequence_len": True, + "sequence_len": 1024, + "load_in_8bit": True, + "adapter": "lora", + "lora_r": 32, + "lora_alpha": 16, + "lora_dropout": 0.05, + "lora_target_linear": True, + "val_set_size": 0.1, + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + }, + ], + "num_epochs": 2, + "micro_batch_size": 2, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_torch", + "lr_scheduler": "cosine", + "max_steps": 20, + "save_steps": 10, + "eval_steps": 10, + "fp16": True, + } + ) + normalize_config(cfg) + cli_args = TrainerCliArgs() + dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) + + train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + assert (Path(temp_dir) / "adapter_model.bin").exists() diff --git a/tests/e2e/patched/test_falcon_samplepack.py b/tests/e2e/patched/test_falcon_samplepack.py new file mode 100644 index 0000000000000000000000000000000000000000..ae6a4973918618cd66a5f4c78386dced05585388 --- /dev/null +++ b/tests/e2e/patched/test_falcon_samplepack.py @@ -0,0 +1,112 @@ +""" +E2E tests for falcon +""" + +import logging +import os +import unittest +from pathlib import Path + +from axolotl.cli import load_datasets +from axolotl.common.cli import TrainerCliArgs +from axolotl.train import train +from axolotl.utils.config import normalize_config +from axolotl.utils.dict import DictDefault + +from ..utils import with_temp_dir + +LOG = logging.getLogger("axolotl.tests.e2e") +os.environ["WANDB_DISABLED"] = "true" + + +class TestFalconPatched(unittest.TestCase): + """ + Test case for Falcon models + """ + + @with_temp_dir + def test_qlora(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "illuin/tiny-random-FalconForCausalLM", + "flash_attention": True, + "sample_packing": True, + "sequence_len": 2048, + "load_in_4bit": True, + "adapter": "qlora", + "lora_r": 16, + "lora_alpha": 32, + "lora_dropout": 0.1, + "lora_target_linear": True, + "lora_modules_to_save": ["word_embeddings", "lm_head"], + "val_set_size": 0.1, + "special_tokens": { + "bos_token": "<|endoftext|>", + "pad_token": "<|endoftext|>", + }, + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + }, + ], + "num_epochs": 2, + "micro_batch_size": 2, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_bnb_8bit", + "lr_scheduler": "cosine", + "max_steps": 20, + "save_steps": 10, + "eval_steps": 10, + "bf16": "auto", + } + ) + normalize_config(cfg) + cli_args = TrainerCliArgs() + dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) + + train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + assert (Path(temp_dir) / "adapter_model.bin").exists() + + @with_temp_dir + def test_ft(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "illuin/tiny-random-FalconForCausalLM", + "flash_attention": True, + "sample_packing": True, + "sequence_len": 2048, + "val_set_size": 0.1, + "special_tokens": { + "bos_token": "<|endoftext|>", + "pad_token": "<|endoftext|>", + }, + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + }, + ], + "num_epochs": 2, + "micro_batch_size": 2, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_bnb_8bit", + "lr_scheduler": "cosine", + "max_steps": 20, + "save_steps": 10, + "eval_steps": 10, + "bf16": "auto", + } + ) + normalize_config(cfg) + cli_args = TrainerCliArgs() + dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) + + train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + assert (Path(temp_dir) / "pytorch_model.bin").exists() diff --git a/tests/e2e/patched/test_fused_llama.py b/tests/e2e/patched/test_fused_llama.py new file mode 100644 index 0000000000000000000000000000000000000000..de1195c368e634e344c1df631b6d0e7df22c2a57 --- /dev/null +++ b/tests/e2e/patched/test_fused_llama.py @@ -0,0 +1,74 @@ +""" +E2E tests for lora llama +""" + +import logging +import os +import unittest +from pathlib import Path + +from transformers.utils import is_torch_bf16_gpu_available + +from axolotl.cli import load_datasets +from axolotl.common.cli import TrainerCliArgs +from axolotl.train import train +from axolotl.utils.config import normalize_config +from axolotl.utils.dict import DictDefault + +from ..utils import with_temp_dir + +LOG = logging.getLogger("axolotl.tests.e2e") +os.environ["WANDB_DISABLED"] = "true" + + +class TestFusedLlama(unittest.TestCase): + """ + Test case for Llama models using Fused layers + """ + + @with_temp_dir + def test_fft_packing(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "JackFram/llama-68m", + "flash_attention": True, + "pad_to_sequence_len": True, + "flash_attn_fuse_qkv": True, + "flash_attn_fuse_mlp": True, + "sample_packing": True, + "sequence_len": 1024, + "val_set_size": 0.1, + "special_tokens": { + "unk_token": "", + "bos_token": "", + "eos_token": "", + }, + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + }, + ], + "num_epochs": 2, + "micro_batch_size": 2, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_torch", + "lr_scheduler": "cosine", + "max_steps": 10, + "save_steps": 5, + "eval_steps": 5, + } + ) + if is_torch_bf16_gpu_available(): + cfg.bf16 = True + else: + cfg.fp16 = True + normalize_config(cfg) + cli_args = TrainerCliArgs() + dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) + + train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + assert (Path(temp_dir) / "pytorch_model.bin").exists() diff --git a/tests/e2e/patched/test_llama_s2_attention.py b/tests/e2e/patched/test_llama_s2_attention.py new file mode 100644 index 0000000000000000000000000000000000000000..f1d37eb3ca879d351a0562fa272e95888239dc76 --- /dev/null +++ b/tests/e2e/patched/test_llama_s2_attention.py @@ -0,0 +1,111 @@ +""" +E2E tests for llama w/ S2 attn +""" + +import logging +import os +import unittest +from pathlib import Path + +from axolotl.cli import load_datasets +from axolotl.common.cli import TrainerCliArgs +from axolotl.train import train +from axolotl.utils.config import normalize_config +from axolotl.utils.dict import DictDefault + +from ..utils import with_temp_dir + +LOG = logging.getLogger("axolotl.tests.e2e") +os.environ["WANDB_DISABLED"] = "true" + + +class TestLlamaShiftedSparseAttention(unittest.TestCase): + """ + Test case for Llama models using S2 Attn + """ + + @with_temp_dir + def test_lora_s2_attn(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "JackFram/llama-68m", + "tokenizer_type": "LlamaTokenizer", + "sequence_len": 16384, + "sample_packing": False, + "flash_attention": True, + "s2_attention": True, + "load_in_8bit": True, + "adapter": "lora", + "lora_r": 32, + "lora_alpha": 16, + "lora_dropout": 0.05, + "lora_target_linear": True, + "val_set_size": 0.1, + "special_tokens": {}, + "datasets": [ + { + "path": "Yukang/LongAlpaca-12k", + "type": "alpaca", + }, + ], + "num_epochs": 2, + "micro_batch_size": 1, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_torch", + "lr_scheduler": "cosine", + "max_steps": 10, + "save_steps": 5, + "eval_steps": 5, + "bf16": "auto", + } + ) + + normalize_config(cfg) + cli_args = TrainerCliArgs() + dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) + + train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + assert (Path(temp_dir) / "adapter_model.bin").exists() + + @with_temp_dir + def test_fft_s2_attn(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "JackFram/llama-68m", + "tokenizer_type": "LlamaTokenizer", + "sequence_len": 16384, + "sample_packing": False, + "flash_attention": True, + "s2_attention": True, + "val_set_size": 0.1, + "special_tokens": {}, + "datasets": [ + { + "path": "Yukang/LongAlpaca-12k", + "type": "alpaca", + }, + ], + "num_epochs": 2, + "micro_batch_size": 1, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_torch", + "lr_scheduler": "cosine", + "max_steps": 10, + "save_steps": 5, + "eval_steps": 5, + "bf16": "auto", + } + ) + + normalize_config(cfg) + cli_args = TrainerCliArgs() + dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) + + train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + assert (Path(temp_dir) / "pytorch_model.bin").exists() diff --git a/tests/e2e/patched/test_lora_llama_multipack.py b/tests/e2e/patched/test_lora_llama_multipack.py new file mode 100644 index 0000000000000000000000000000000000000000..f251f9b66165884e6b3da4e6fa12659e04c8d1c8 --- /dev/null +++ b/tests/e2e/patched/test_lora_llama_multipack.py @@ -0,0 +1,126 @@ +""" +E2E tests for lora llama +""" + +import logging +import os +import unittest +from pathlib import Path + +import pytest +from transformers.utils import is_auto_gptq_available, is_torch_bf16_gpu_available + +from axolotl.cli import load_datasets +from axolotl.common.cli import TrainerCliArgs +from axolotl.train import train +from axolotl.utils.config import normalize_config +from axolotl.utils.dict import DictDefault + +from ..utils import with_temp_dir + +LOG = logging.getLogger("axolotl.tests.e2e") +os.environ["WANDB_DISABLED"] = "true" + + +class TestLoraLlama(unittest.TestCase): + """ + Test case for Llama models using LoRA w multipack + """ + + @with_temp_dir + def test_lora_packing(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "JackFram/llama-68m", + "tokenizer_type": "LlamaTokenizer", + "sequence_len": 1024, + "sample_packing": True, + "flash_attention": True, + "load_in_8bit": True, + "adapter": "lora", + "lora_r": 32, + "lora_alpha": 64, + "lora_dropout": 0.05, + "lora_target_linear": True, + "val_set_size": 0.2, + "special_tokens": { + "unk_token": "", + "bos_token": "", + "eos_token": "", + }, + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + }, + ], + "num_epochs": 2, + "micro_batch_size": 8, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_torch", + "lr_scheduler": "cosine", + } + ) + if is_torch_bf16_gpu_available(): + cfg.bf16 = True + else: + cfg.fp16 = True + + normalize_config(cfg) + cli_args = TrainerCliArgs() + dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) + + train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + assert (Path(temp_dir) / "adapter_model.bin").exists() + + @pytest.mark.skipif(not is_auto_gptq_available(), reason="auto-gptq not available") + @with_temp_dir + def test_lora_gptq_packed(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "TheBlokeAI/jackfram_llama-68m-GPTQ", + "model_type": "AutoModelForCausalLM", + "tokenizer_type": "LlamaTokenizer", + "sequence_len": 1024, + "sample_packing": True, + "flash_attention": True, + "load_in_8bit": True, + "adapter": "lora", + "gptq": True, + "gptq_disable_exllama": True, + "lora_r": 32, + "lora_alpha": 64, + "lora_dropout": 0.05, + "lora_target_linear": True, + "val_set_size": 0.1, + "special_tokens": { + "unk_token": "", + "bos_token": "", + "eos_token": "", + }, + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + }, + ], + "num_epochs": 2, + "save_steps": 0.5, + "micro_batch_size": 8, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_torch", + "lr_scheduler": "cosine", + } + ) + normalize_config(cfg) + cli_args = TrainerCliArgs() + dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) + + train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + assert (Path(temp_dir) / "adapter_model.bin").exists() diff --git a/tests/e2e/patched/test_mistral_samplepack.py b/tests/e2e/patched/test_mistral_samplepack.py new file mode 100644 index 0000000000000000000000000000000000000000..a56c530b219b19f2315c9868c4c19780eb27953b --- /dev/null +++ b/tests/e2e/patched/test_mistral_samplepack.py @@ -0,0 +1,113 @@ +""" +E2E tests for lora llama +""" + +import logging +import os +import unittest +from pathlib import Path + +from axolotl.cli import load_datasets +from axolotl.common.cli import TrainerCliArgs +from axolotl.train import train +from axolotl.utils.config import normalize_config +from axolotl.utils.dict import DictDefault + +from ..utils import with_temp_dir + +LOG = logging.getLogger("axolotl.tests.e2e") +os.environ["WANDB_DISABLED"] = "true" + + +class TestMistral(unittest.TestCase): + """ + Test case for Llama models using LoRA + """ + + @with_temp_dir + def test_lora_packing(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "openaccess-ai-collective/tiny-mistral", + "flash_attention": True, + "sample_packing": True, + "sequence_len": 1024, + "load_in_8bit": True, + "adapter": "lora", + "lora_r": 32, + "lora_alpha": 64, + "lora_dropout": 0.05, + "lora_target_linear": True, + "val_set_size": 0.1, + "special_tokens": { + "unk_token": "", + "bos_token": "", + "eos_token": "", + }, + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + }, + ], + "num_epochs": 2, + "micro_batch_size": 2, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_torch", + "lr_scheduler": "cosine", + "max_steps": 20, + "save_steps": 10, + "eval_steps": 10, + "bf16": "auto", + } + ) + normalize_config(cfg) + cli_args = TrainerCliArgs() + dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) + + train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + assert (Path(temp_dir) / "adapter_model.bin").exists() + + @with_temp_dir + def test_ft_packing(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "openaccess-ai-collective/tiny-mistral", + "flash_attention": True, + "sample_packing": True, + "sequence_len": 1024, + "val_set_size": 0.1, + "special_tokens": { + "unk_token": "", + "bos_token": "", + "eos_token": "", + }, + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + }, + ], + "num_epochs": 2, + "micro_batch_size": 2, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_torch", + "lr_scheduler": "cosine", + "max_steps": 20, + "save_steps": 10, + "eval_steps": 10, + "bf16": "auto", + } + ) + normalize_config(cfg) + cli_args = TrainerCliArgs() + dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) + + train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + assert (Path(temp_dir) / "pytorch_model.bin").exists() diff --git a/tests/e2e/patched/test_mixtral_samplepack.py b/tests/e2e/patched/test_mixtral_samplepack.py new file mode 100644 index 0000000000000000000000000000000000000000..4c05113f55e0d796b2a7b4035b0b51768c04d769 --- /dev/null +++ b/tests/e2e/patched/test_mixtral_samplepack.py @@ -0,0 +1,111 @@ +""" +E2E tests for mixtral +""" + +import logging +import os +import unittest +from pathlib import Path + +from axolotl.cli import load_datasets +from axolotl.common.cli import TrainerCliArgs +from axolotl.train import train +from axolotl.utils.config import normalize_config +from axolotl.utils.dict import DictDefault + +from ..utils import with_temp_dir + +LOG = logging.getLogger("axolotl.tests.e2e") +os.environ["WANDB_DISABLED"] = "true" + + +class TestMixtral(unittest.TestCase): + """ + Test case for Llama models using LoRA + """ + + @with_temp_dir + def test_qlora(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "hf-internal-testing/Mixtral-tiny", + "tokenizer_config": "mistralai/Mixtral-8x7B-v0.1", + "flash_attention": True, + "sample_packing": True, + "sequence_len": 2048, + "load_in_4bit": True, + "adapter": "qlora", + "lora_r": 16, + "lora_alpha": 32, + "lora_dropout": 0.1, + "lora_target_linear": True, + "val_set_size": 0.1, + "special_tokens": {}, + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + }, + ], + "num_epochs": 2, + "micro_batch_size": 2, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_bnb_8bit", + "lr_scheduler": "cosine", + "max_steps": 20, + "save_steps": 10, + "eval_steps": 10, + "bf16": "auto", + } + ) + normalize_config(cfg) + cli_args = TrainerCliArgs() + dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) + + train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + assert (Path(temp_dir) / "adapter_model.bin").exists() + + @with_temp_dir + def test_ft(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "hf-internal-testing/Mixtral-tiny", + "tokenizer_config": "mistralai/Mixtral-8x7B-v0.1", + "flash_attention": True, + "sample_packing": True, + "sequence_len": 2048, + "val_set_size": 0.1, + "special_tokens": {}, + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + }, + ], + "num_epochs": 2, + "micro_batch_size": 2, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_bnb_8bit", + "lr_scheduler": "cosine", + "max_steps": 20, + "save_steps": 10, + "eval_steps": 10, + "bf16": "auto", + } + ) + normalize_config(cfg) + cli_args = TrainerCliArgs() + dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) + + model, _ = train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + assert ( + "MixtralFlashAttention2" + in model.model.layers[0].self_attn.__class__.__name__ + ) + assert (Path(temp_dir) / "pytorch_model.bin").exists() diff --git a/tests/e2e/patched/test_model_patches.py b/tests/e2e/patched/test_model_patches.py new file mode 100644 index 0000000000000000000000000000000000000000..8384b826f1a9b32fa0db2bd38058d6843e1dd5eb --- /dev/null +++ b/tests/e2e/patched/test_model_patches.py @@ -0,0 +1,95 @@ +""" +E2E smoke tests to check that the monkeypatches are in place for certain configurations +""" + +import unittest + +from axolotl.common.cli import TrainerCliArgs +from axolotl.utils.config import normalize_config +from axolotl.utils.dict import DictDefault +from axolotl.utils.models import load_model, load_tokenizer + +from ..utils import with_temp_dir + + +class TestModelPatches(unittest.TestCase): + """ + TestCases for the multipack monkey patches + """ + + @with_temp_dir + def test_mixtral_multipack(self, temp_dir): + cfg = DictDefault( + { + "base_model": "hf-internal-testing/Mixtral-tiny", + "tokenizer_config": "mistralai/Mixtral-8x7B-v0.1", + "flash_attention": True, + "sample_packing": True, + "sequence_len": 2048, + "val_set_size": 0.1, + "special_tokens": {}, + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + }, + ], + "num_epochs": 2, + "micro_batch_size": 2, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_bnb_8bit", + "lr_scheduler": "cosine", + "max_steps": 20, + "save_steps": 10, + "eval_steps": 10, + } + ) + normalize_config(cfg) + cli_args = TrainerCliArgs() + tokenizer = load_tokenizer(cfg) + model, _ = load_model(cfg, tokenizer, inference=cli_args.inference) + + assert ( + "MixtralFlashAttention2" + in model.model.layers[0].self_attn.__class__.__name__ + ) + + @with_temp_dir + def test_mistral_multipack(self, temp_dir): + cfg = DictDefault( + { + "base_model": "openaccess-ai-collective/tiny-mistral", + "flash_attention": True, + "sample_packing": True, + "sequence_len": 2048, + "val_set_size": 0.1, + "special_tokens": {}, + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + }, + ], + "num_epochs": 2, + "micro_batch_size": 2, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_bnb_8bit", + "lr_scheduler": "cosine", + "max_steps": 20, + "save_steps": 10, + "eval_steps": 10, + } + ) + normalize_config(cfg) + cli_args = TrainerCliArgs() + tokenizer = load_tokenizer(cfg) + model, _ = load_model(cfg, tokenizer, inference=cli_args.inference) + + assert ( + "axolotl.monkeypatch.mistral_attn_hijack_flash" + in model.model.layers[0].self_attn.forward.__module__ + ) diff --git a/tests/e2e/patched/test_phi_multipack.py b/tests/e2e/patched/test_phi_multipack.py new file mode 100644 index 0000000000000000000000000000000000000000..5f30453c18b816861c2d98b4349f5dbde0362860 --- /dev/null +++ b/tests/e2e/patched/test_phi_multipack.py @@ -0,0 +1,123 @@ +""" +E2E tests for lora llama +""" + +import logging +import os +import unittest +from pathlib import Path + +from axolotl.cli import load_datasets +from axolotl.common.cli import TrainerCliArgs +from axolotl.train import train +from axolotl.utils.config import normalize_config +from axolotl.utils.dict import DictDefault + +from ..utils import with_temp_dir + +LOG = logging.getLogger("axolotl.tests.e2e") +os.environ["WANDB_DISABLED"] = "true" + + +class TestPhiMultipack(unittest.TestCase): + """ + Test case for Phi2 models + """ + + @with_temp_dir + def test_ft_packed(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "microsoft/phi-1_5", + "model_type": "PhiForCausalLM", + "tokenizer_type": "AutoTokenizer", + "sequence_len": 1024, + "sample_packing": True, + "flash_attention": True, + "pad_to_sequence_len": True, + "load_in_8bit": False, + "adapter": None, + "val_set_size": 0.1, + "special_tokens": { + "pad_token": "<|endoftext|>", + }, + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + }, + ], + "dataset_shard_num": 10, + "dataset_shard_idx": 0, + "num_epochs": 1, + "micro_batch_size": 1, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_bnb_8bit", + "lr_scheduler": "cosine", + "max_steps": 20, + "eval_steps": 10, + "save_steps": 10, + "bf16": "auto", + } + ) + + normalize_config(cfg) + cli_args = TrainerCliArgs() + dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) + + train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + assert (Path(temp_dir) / "pytorch_model.bin").exists() + + @with_temp_dir + def test_qlora_packed(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "microsoft/phi-1_5", + "model_type": "PhiForCausalLM", + "tokenizer_type": "AutoTokenizer", + "sequence_len": 1024, + "sample_packing": True, + "flash_attention": True, + "pad_to_sequence_len": True, + "load_in_8bit": False, + "adapter": "qlora", + "lora_r": 64, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_target_linear": True, + "val_set_size": 0.1, + "special_tokens": { + "pad_token": "<|endoftext|>", + }, + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + }, + ], + "dataset_shard_num": 10, + "dataset_shard_idx": 0, + "num_epochs": 1, + "micro_batch_size": 1, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_bnb_8bit", + "lr_scheduler": "cosine", + "max_steps": 20, + "eval_steps": 10, + "save_steps": 10, + "bf16": "auto", + } + ) + + normalize_config(cfg) + cli_args = TrainerCliArgs() + dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) + + train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + assert (Path(temp_dir) / "adapter_model.bin").exists() diff --git a/tests/e2e/patched/test_resume.py b/tests/e2e/patched/test_resume.py new file mode 100644 index 0000000000000000000000000000000000000000..dfe9e86252d7bccfaf8cf98d7c0d9e762e9aa774 --- /dev/null +++ b/tests/e2e/patched/test_resume.py @@ -0,0 +1,95 @@ +""" +E2E tests for resuming training +""" + +import logging +import os +import re +import subprocess +import unittest +from pathlib import Path + +from transformers.utils import is_torch_bf16_gpu_available + +from axolotl.cli import load_datasets +from axolotl.common.cli import TrainerCliArgs +from axolotl.train import train +from axolotl.utils.config import normalize_config +from axolotl.utils.dict import DictDefault + +from ..utils import most_recent_subdir, with_temp_dir + +LOG = logging.getLogger("axolotl.tests.e2e") +os.environ["WANDB_DISABLED"] = "true" + + +class TestResumeLlama(unittest.TestCase): + """ + Test case for resuming training of llama models + """ + + @with_temp_dir + def test_resume_qlora_packed(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "JackFram/llama-68m", + "tokenizer_type": "LlamaTokenizer", + "sequence_len": 1024, + "sample_packing": True, + "flash_attention": True, + "load_in_4bit": True, + "adapter": "qlora", + "lora_r": 32, + "lora_alpha": 64, + "lora_dropout": 0.05, + "lora_target_linear": True, + "val_set_size": 0.1, + "special_tokens": {}, + "datasets": [ + { + "path": "vicgalle/alpaca-gpt4", + "type": "alpaca", + }, + ], + "num_epochs": 2, + "micro_batch_size": 1, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_torch", + "lr_scheduler": "cosine", + "save_steps": 10, + "save_total_limit": 5, + "max_steps": 40, + } + ) + if is_torch_bf16_gpu_available(): + cfg.bf16 = True + else: + cfg.fp16 = True + normalize_config(cfg) + cli_args = TrainerCliArgs() + dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) + + train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + + resume_cfg = cfg | DictDefault( + { + "resume_from_checkpoint": f"{temp_dir}/checkpoint-30/", + } + ) + normalize_config(resume_cfg) + cli_args = TrainerCliArgs() + + train(cfg=resume_cfg, cli_args=cli_args, dataset_meta=dataset_meta) + assert (Path(temp_dir) / "adapter_model.bin").exists() + + tb_log_path_1 = most_recent_subdir(temp_dir + "/runs") + cmd = f"tensorboard --inspect --logdir {tb_log_path_1}" + res = subprocess.run( + cmd, shell=True, text=True, capture_output=True, check=True + ) + pattern = r"first_step\s+(\d+)" + first_steps = int(re.findall(pattern, res.stdout)[0]) + assert first_steps == 31 diff --git a/tests/e2e/test_dpo.py b/tests/e2e/test_dpo.py new file mode 100644 index 0000000000000000000000000000000000000000..e28df7411f645cbd94843ceb9ccf6caeb0b2503c --- /dev/null +++ b/tests/e2e/test_dpo.py @@ -0,0 +1,160 @@ +""" +E2E tests for lora llama +""" + +import logging +import os +import unittest +from pathlib import Path + +import pytest + +from axolotl.cli import load_rl_datasets +from axolotl.common.cli import TrainerCliArgs +from axolotl.train import train +from axolotl.utils.config import normalize_config +from axolotl.utils.dict import DictDefault + +from .utils import with_temp_dir + +LOG = logging.getLogger("axolotl.tests.e2e") +os.environ["WANDB_DISABLED"] = "true" + + +@pytest.mark.skip(reason="doesn't seem to work on modal") +class TestDPOLlamaLora(unittest.TestCase): + """ + Test case for DPO Llama models using LoRA + """ + + @with_temp_dir + def test_dpo_lora(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "JackFram/llama-68m", + "tokenizer_type": "LlamaTokenizer", + "sequence_len": 1024, + "load_in_8bit": True, + "adapter": "lora", + "lora_r": 64, + "lora_alpha": 32, + "lora_dropout": 0.1, + "lora_target_linear": True, + "special_tokens": {}, + "rl": "dpo", + "datasets": [ + { + "path": "Intel/orca_dpo_pairs", + "type": "chatml.intel", + "split": "train", + }, + ], + "num_epochs": 1, + "micro_batch_size": 4, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "paged_adamw_8bit", + "lr_scheduler": "cosine", + "max_steps": 20, + "save_steps": 10, + "warmup_steps": 5, + "gradient_checkpointing": True, + "gradient_checkpointing_kwargs": {"use_reentrant": True}, + } + ) + normalize_config(cfg) + cli_args = TrainerCliArgs() + dataset_meta = load_rl_datasets(cfg=cfg, cli_args=cli_args) + + train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + assert (Path(temp_dir) / "checkpoint-20/adapter_model.safetensors").exists() + + @with_temp_dir + def test_kto_pair_lora(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "JackFram/llama-68m", + "tokenizer_type": "LlamaTokenizer", + "sequence_len": 1024, + "load_in_8bit": True, + "adapter": "lora", + "lora_r": 64, + "lora_alpha": 32, + "lora_dropout": 0.1, + "lora_target_linear": True, + "special_tokens": {}, + "rl": "kto_pair", + "datasets": [ + { + "path": "Intel/orca_dpo_pairs", + "type": "chatml.intel", + "split": "train", + }, + ], + "num_epochs": 1, + "micro_batch_size": 4, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "paged_adamw_8bit", + "lr_scheduler": "cosine", + "max_steps": 20, + "save_steps": 10, + "warmup_steps": 5, + "gradient_checkpointing": True, + "gradient_checkpointing_kwargs": {"use_reentrant": True}, + } + ) + normalize_config(cfg) + cli_args = TrainerCliArgs() + dataset_meta = load_rl_datasets(cfg=cfg, cli_args=cli_args) + + train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + assert (Path(temp_dir) / "checkpoint-20/adapter_model.safetensors").exists() + + @with_temp_dir + def test_ipo_lora(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "JackFram/llama-68m", + "tokenizer_type": "LlamaTokenizer", + "sequence_len": 1024, + "load_in_8bit": True, + "adapter": "lora", + "lora_r": 64, + "lora_alpha": 32, + "lora_dropout": 0.1, + "lora_target_linear": True, + "special_tokens": {}, + "rl": "ipo", + "datasets": [ + { + "path": "Intel/orca_dpo_pairs", + "type": "chatml.intel", + "split": "train", + }, + ], + "num_epochs": 1, + "micro_batch_size": 4, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "paged_adamw_8bit", + "lr_scheduler": "cosine", + "max_steps": 20, + "save_steps": 10, + "warmup_steps": 5, + "gradient_checkpointing": True, + "gradient_checkpointing_kwargs": {"use_reentrant": True}, + } + ) + normalize_config(cfg) + cli_args = TrainerCliArgs() + dataset_meta = load_rl_datasets(cfg=cfg, cli_args=cli_args) + + train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + assert (Path(temp_dir) / "checkpoint-20/adapter_model.safetensors").exists() diff --git a/tests/e2e/test_falcon.py b/tests/e2e/test_falcon.py new file mode 100644 index 0000000000000000000000000000000000000000..c76699a7c86e373d41da46396b07267bdb6e6b3f --- /dev/null +++ b/tests/e2e/test_falcon.py @@ -0,0 +1,166 @@ +""" +E2E tests for falcon +""" + +import logging +import os +import unittest +from pathlib import Path + +from axolotl.cli import load_datasets +from axolotl.common.cli import TrainerCliArgs +from axolotl.train import train +from axolotl.utils.config import normalize_config +from axolotl.utils.dict import DictDefault + +from .utils import with_temp_dir + +LOG = logging.getLogger("axolotl.tests.e2e") +os.environ["WANDB_DISABLED"] = "true" + + +class TestFalcon(unittest.TestCase): + """ + Test case for falcon + """ + + @with_temp_dir + def test_lora(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "illuin/tiny-random-FalconForCausalLM", + "flash_attention": True, + "sequence_len": 1024, + "load_in_8bit": True, + "adapter": "lora", + "lora_r": 32, + "lora_alpha": 64, + "lora_dropout": 0.05, + "lora_target_linear": True, + "lora_modules_to_save": [ + "word_embeddings", + "lm_head", + ], + "val_set_size": 0.1, + "special_tokens": { + "bos_token": "<|endoftext|>", + "pad_token": "<|endoftext|>", + }, + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + }, + ], + "num_epochs": 2, + "micro_batch_size": 2, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_torch", + "lr_scheduler": "cosine", + "max_steps": 20, + "save_steps": 10, + "eval_steps": 10, + "bf16": "auto", + } + ) + normalize_config(cfg) + cli_args = TrainerCliArgs() + dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) + + train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + assert (Path(temp_dir) / "adapter_model.bin").exists() + + @with_temp_dir + def test_lora_added_vocab(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "illuin/tiny-random-FalconForCausalLM", + "flash_attention": True, + "sequence_len": 1024, + "load_in_8bit": True, + "adapter": "lora", + "lora_r": 32, + "lora_alpha": 64, + "lora_dropout": 0.05, + "lora_target_linear": True, + "lora_modules_to_save": [ + "word_embeddings", + "lm_head", + ], + "val_set_size": 0.1, + "special_tokens": { + "bos_token": "<|endoftext|>", + "pad_token": "<|endoftext|>", + }, + "tokens": [ + "<|im_start|>", + "<|im_end|>", + ], + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + }, + ], + "num_epochs": 2, + "micro_batch_size": 2, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_torch", + "lr_scheduler": "cosine", + "max_steps": 20, + "save_steps": 10, + "eval_steps": 10, + "bf16": "auto", + } + ) + normalize_config(cfg) + cli_args = TrainerCliArgs() + dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) + + train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + assert (Path(temp_dir) / "adapter_model.bin").exists() + + @with_temp_dir + def test_ft(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "illuin/tiny-random-FalconForCausalLM", + "flash_attention": True, + "sequence_len": 1024, + "val_set_size": 0.1, + "special_tokens": { + "bos_token": "<|endoftext|>", + "pad_token": "<|endoftext|>", + }, + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + }, + ], + "num_epochs": 2, + "micro_batch_size": 2, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_torch", + "lr_scheduler": "cosine", + "max_steps": 20, + "save_steps": 10, + "eval_steps": 10, + "bf16": "auto", + } + ) + normalize_config(cfg) + cli_args = TrainerCliArgs() + dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) + + train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + assert (Path(temp_dir) / "pytorch_model.bin").exists() diff --git a/tests/e2e/test_lora_llama.py b/tests/e2e/test_lora_llama.py new file mode 100644 index 0000000000000000000000000000000000000000..c79652bef76c8682381cac7ad9fe9eceee9464e3 --- /dev/null +++ b/tests/e2e/test_lora_llama.py @@ -0,0 +1,67 @@ +""" +E2E tests for lora llama +""" + +import logging +import os +import unittest +from pathlib import Path + +from axolotl.cli import load_datasets +from axolotl.common.cli import TrainerCliArgs +from axolotl.train import train +from axolotl.utils.config import normalize_config +from axolotl.utils.dict import DictDefault + +from .utils import with_temp_dir + +LOG = logging.getLogger("axolotl.tests.e2e") +os.environ["WANDB_DISABLED"] = "true" + + +class TestLoraLlama(unittest.TestCase): + """ + Test case for Llama models using LoRA + """ + + @with_temp_dir + def test_lora(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "JackFram/llama-68m", + "tokenizer_type": "LlamaTokenizer", + "sequence_len": 1024, + "load_in_8bit": True, + "adapter": "lora", + "lora_r": 32, + "lora_alpha": 64, + "lora_dropout": 0.05, + "lora_target_linear": True, + "val_set_size": 0.1, + "special_tokens": { + "unk_token": "", + "bos_token": "", + "eos_token": "", + }, + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + }, + ], + "num_epochs": 2, + "micro_batch_size": 8, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_torch", + "lr_scheduler": "cosine", + } + ) + normalize_config(cfg) + cli_args = TrainerCliArgs() + dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) + + train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + assert (Path(temp_dir) / "adapter_model.bin").exists() diff --git a/tests/e2e/test_mamba.py b/tests/e2e/test_mamba.py new file mode 100644 index 0000000000000000000000000000000000000000..8755fa4d51fcf45c678d6f84e94e0b82a8367b65 --- /dev/null +++ b/tests/e2e/test_mamba.py @@ -0,0 +1,68 @@ +""" +E2E tests for lora llama +""" + +import logging +import os +import unittest +from pathlib import Path + +import pytest + +from axolotl.cli import load_datasets +from axolotl.common.cli import TrainerCliArgs +from axolotl.train import train +from axolotl.utils.config import normalize_config +from axolotl.utils.dict import DictDefault + +from .utils import with_temp_dir + +LOG = logging.getLogger("axolotl.tests.e2e") +os.environ["WANDB_DISABLED"] = "true" + + +@pytest.mark.skip(reason="skipping until upstreamed into transformers") +class TestMamba(unittest.TestCase): + """ + Test case for Mamba models + """ + + @with_temp_dir + def test_fft(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "state-spaces/mamba-130m", + "model_type": "MambaLMHeadModel", + "tokenizer_type": "AutoTokenizer", + "tokenizer_config": "EleutherAI/gpt-neox-20b", + "flash_attention": False, + "sequence_len": 1024, + "load_in_8bit": False, + "val_set_size": 0.0, + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + }, + ], + "gradient_checkpointing": False, + "num_epochs": 2, + "micro_batch_size": 2, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_torch", + "lr_scheduler": "cosine", + "max_steps": 20, + "save_steps": 10, + "eval_steps": None, + "save_safetensors": False, + } + ) + normalize_config(cfg) + cli_args = TrainerCliArgs() + dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) + + train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + assert (Path(temp_dir) / "pytorch_model.bin").exists() diff --git a/tests/e2e/test_mistral.py b/tests/e2e/test_mistral.py new file mode 100644 index 0000000000000000000000000000000000000000..57d85e51eb9137f3032757f7da8972e9b69f779e --- /dev/null +++ b/tests/e2e/test_mistral.py @@ -0,0 +1,115 @@ +""" +E2E tests for lora llama +""" + +import logging +import os +import unittest +from pathlib import Path + +from transformers.utils import is_torch_bf16_gpu_available + +from axolotl.cli import load_datasets +from axolotl.common.cli import TrainerCliArgs +from axolotl.train import train +from axolotl.utils.config import normalize_config +from axolotl.utils.dict import DictDefault + +from .utils import with_temp_dir + +LOG = logging.getLogger("axolotl.tests.e2e") +os.environ["WANDB_DISABLED"] = "true" + + +class TestMistral(unittest.TestCase): + """ + Test case for Llama models using LoRA + """ + + @with_temp_dir + def test_lora(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "openaccess-ai-collective/tiny-mistral", + "flash_attention": True, + "sequence_len": 1024, + "load_in_8bit": True, + "adapter": "lora", + "lora_r": 32, + "lora_alpha": 64, + "lora_dropout": 0.05, + "lora_target_linear": True, + "val_set_size": 0.1, + "special_tokens": { + "unk_token": "", + "bos_token": "", + "eos_token": "", + }, + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + }, + ], + "num_epochs": 2, + "micro_batch_size": 2, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_torch", + "lr_scheduler": "cosine", + "max_steps": 20, + "save_steps": 10, + "eval_steps": 10, + } + ) + normalize_config(cfg) + cli_args = TrainerCliArgs() + dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) + + train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + assert (Path(temp_dir) / "adapter_model.bin").exists() + + @with_temp_dir + def test_ft(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "openaccess-ai-collective/tiny-mistral", + "flash_attention": True, + "sequence_len": 1024, + "val_set_size": 0.1, + "special_tokens": { + "unk_token": "", + "bos_token": "", + "eos_token": "", + }, + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + }, + ], + "num_epochs": 2, + "micro_batch_size": 2, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_torch", + "lr_scheduler": "cosine", + "max_steps": 20, + "save_steps": 10, + "eval_steps": 10, + } + ) + if is_torch_bf16_gpu_available(): + cfg.bf16 = True + else: + cfg.fp16 = True + normalize_config(cfg) + cli_args = TrainerCliArgs() + dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) + + train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + assert (Path(temp_dir) / "pytorch_model.bin").exists() diff --git a/tests/e2e/test_mixtral.py b/tests/e2e/test_mixtral.py new file mode 100644 index 0000000000000000000000000000000000000000..68afcdec4a1d7a01dd42378293d1404bdc8ab354 --- /dev/null +++ b/tests/e2e/test_mixtral.py @@ -0,0 +1,290 @@ +""" +E2E tests for mixtral +""" + +import logging +import os +import unittest +from pathlib import Path + +import torch +from transformers.utils import is_torch_bf16_gpu_available + +from axolotl.cli import load_datasets +from axolotl.common.cli import TrainerCliArgs +from axolotl.train import train +from axolotl.utils.config import normalize_config +from axolotl.utils.dict import DictDefault + +from .utils import with_temp_dir + +LOG = logging.getLogger("axolotl.tests.e2e") +os.environ["WANDB_DISABLED"] = "true" + + +class TestMixtral(unittest.TestCase): + """ + Test case for Llama models using LoRA + """ + + @with_temp_dir + def test_qlora_w_fa2(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "hf-internal-testing/Mixtral-tiny", + "tokenizer_config": "mistralai/Mixtral-8x7B-v0.1", + "flash_attention": True, + "sequence_len": 1024, + "load_in_4bit": True, + "adapter": "qlora", + "lora_r": 4, + "lora_alpha": 8, + "lora_dropout": 0.1, + "lora_target_modules": [ + "o_proj", + "w3", + "k_proj", + "v_proj", + "w1", + "q_proj", + "w2", + ], + "val_set_size": 0.1, + "special_tokens": {}, + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + }, + ], + "num_epochs": 2, + "micro_batch_size": 2, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_bnb_8bit", + "lr_scheduler": "cosine", + "max_steps": 20, + "save_steps": 10, + "eval_steps": 10, + } + ) + normalize_config(cfg) + cli_args = TrainerCliArgs() + dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) + + model, _ = train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + assert ( + model.base_model.model.model.layers[0].block_sparse_moe.gate.weight.dtype + == torch.float32 + ) + assert (Path(temp_dir) / "adapter_model.bin").exists() + + @with_temp_dir + def test_qlora_wo_fa2(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "hf-internal-testing/Mixtral-tiny", + "tokenizer_config": "mistralai/Mixtral-8x7B-v0.1", + "flash_attention": False, + "sequence_len": 1024, + "load_in_4bit": True, + "adapter": "qlora", + "lora_r": 4, + "lora_alpha": 8, + "lora_dropout": 0.1, + "lora_target_modules": [ + "o_proj", + "w3", + "k_proj", + "v_proj", + "w1", + "q_proj", + "w2", + ], + "val_set_size": 0.1, + "special_tokens": {}, + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + }, + ], + "num_epochs": 2, + "micro_batch_size": 2, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_bnb_8bit", + "lr_scheduler": "cosine", + "max_steps": 20, + "save_steps": 10, + "eval_steps": 10, + } + ) + normalize_config(cfg) + cli_args = TrainerCliArgs() + dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) + + model, _ = train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + assert ( + model.base_model.model.model.layers[0].block_sparse_moe.gate.weight.dtype + == torch.float32 + ) + assert (Path(temp_dir) / "adapter_model.bin").exists() + + @with_temp_dir + def test_16bit_lora_w_fa2(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "hf-internal-testing/Mixtral-tiny", + "tokenizer_config": "mistralai/Mixtral-8x7B-v0.1", + "flash_attention": True, + "sequence_len": 1024, + "adapter": "lora", + "lora_r": 4, + "lora_alpha": 8, + "lora_dropout": 0.1, + "lora_target_modules": [ + "o_proj", + "w3", + "k_proj", + "v_proj", + "w1", + "q_proj", + "w2", + ], + "val_set_size": 0.1, + "special_tokens": {}, + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + }, + ], + "num_epochs": 2, + "micro_batch_size": 2, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_bnb_8bit", + "lr_scheduler": "cosine", + "max_steps": 20, + "save_steps": 10, + "eval_steps": 10, + } + ) + if is_torch_bf16_gpu_available(): + cfg.bf16 = True + else: + cfg.fp16 = True + normalize_config(cfg) + cli_args = TrainerCliArgs() + dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) + + model, _ = train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + assert ( + model.base_model.model.model.layers[0].block_sparse_moe.gate.weight.dtype + == torch.float32 + ) + assert (Path(temp_dir) / "adapter_model.bin").exists() + + @with_temp_dir + def test_16bit_lora_wo_fa2(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "hf-internal-testing/Mixtral-tiny", + "tokenizer_config": "mistralai/Mixtral-8x7B-v0.1", + "flash_attention": False, + "sequence_len": 1024, + "adapter": "lora", + "lora_r": 4, + "lora_alpha": 8, + "lora_dropout": 0.1, + "lora_target_modules": [ + "o_proj", + "w3", + "k_proj", + "v_proj", + "w1", + "q_proj", + "w2", + ], + "val_set_size": 0.1, + "special_tokens": {}, + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + }, + ], + "num_epochs": 2, + "micro_batch_size": 2, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_bnb_8bit", + "lr_scheduler": "cosine", + "max_steps": 20, + "save_steps": 10, + "eval_steps": 10, + } + ) + normalize_config(cfg) + if is_torch_bf16_gpu_available(): + cfg.bf16 = True + else: + cfg.fp16 = True + cli_args = TrainerCliArgs() + dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) + + model, _ = train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + assert ( + model.base_model.model.model.layers[0].block_sparse_moe.gate.weight.dtype + == torch.float32 + ) + assert (Path(temp_dir) / "adapter_model.bin").exists() + + @with_temp_dir + def test_ft(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "hf-internal-testing/Mixtral-tiny", + "tokenizer_config": "mistralai/Mixtral-8x7B-v0.1", + "flash_attention": True, + "sequence_len": 1024, + "val_set_size": 0.1, + "special_tokens": {}, + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + }, + ], + "num_epochs": 2, + "micro_batch_size": 2, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_bnb_8bit", + "lr_scheduler": "cosine", + "max_steps": 20, + "save_steps": 10, + "eval_steps": 10, + } + ) + if is_torch_bf16_gpu_available(): + cfg.bf16 = True + else: + cfg.fp16 = True + normalize_config(cfg) + cli_args = TrainerCliArgs() + dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) + + train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + assert (Path(temp_dir) / "pytorch_model.bin").exists() diff --git a/tests/e2e/test_phi.py b/tests/e2e/test_phi.py new file mode 100644 index 0000000000000000000000000000000000000000..7abed8594538b101badc38832625b6567b0fa496 --- /dev/null +++ b/tests/e2e/test_phi.py @@ -0,0 +1,122 @@ +""" +E2E tests for lora llama +""" + +import logging +import os +import unittest +from pathlib import Path + +import pytest + +from axolotl.cli import load_datasets +from axolotl.common.cli import TrainerCliArgs +from axolotl.train import train +from axolotl.utils.config import normalize_config +from axolotl.utils.dict import DictDefault + +from .utils import with_temp_dir + +LOG = logging.getLogger("axolotl.tests.e2e") +os.environ["WANDB_DISABLED"] = "true" + + +@pytest.mark.skip(reason="doesn't seem to work on modal") +class TestPhi(unittest.TestCase): + """ + Test case for Phi2 models + """ + + @with_temp_dir + def test_phi_ft(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "microsoft/phi-1_5", + "model_type": "AutoModelForCausalLM", + "tokenizer_type": "AutoTokenizer", + "sequence_len": 2048, + "sample_packing": False, + "load_in_8bit": False, + "adapter": None, + "val_set_size": 0.1, + "special_tokens": { + "pad_token": "<|endoftext|>", + }, + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + }, + ], + "dataset_shard_num": 10, + "dataset_shard_idx": 0, + "num_epochs": 1, + "micro_batch_size": 1, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "paged_adamw_8bit", + "lr_scheduler": "cosine", + "flash_attention": True, + "max_steps": 10, + "save_steps": 10, + "eval_steps": 10, + "bf16": "auto", + } + ) + normalize_config(cfg) + cli_args = TrainerCliArgs() + dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) + + train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + assert (Path(temp_dir) / "pytorch_model.bin").exists() + + @with_temp_dir + def test_phi_qlora(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "microsoft/phi-1_5", + "model_type": "AutoModelForCausalLM", + "tokenizer_type": "AutoTokenizer", + "sequence_len": 2048, + "sample_packing": False, + "load_in_8bit": False, + "adapter": "qlora", + "lora_r": 64, + "lora_alpha": 32, + "lora_dropout": 0.05, + "lora_target_linear": True, + "val_set_size": 0.1, + "special_tokens": { + "pad_token": "<|endoftext|>", + }, + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + }, + ], + "dataset_shard_num": 10, + "dataset_shard_idx": 0, + "num_epochs": 1, + "micro_batch_size": 1, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "paged_adamw_8bit", + "lr_scheduler": "cosine", + "flash_attention": True, + "max_steps": 10, + "save_steps": 10, + "eval_steps": 10, + "bf16": "auto", + } + ) + normalize_config(cfg) + cli_args = TrainerCliArgs() + dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) + + train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + assert (Path(temp_dir) / "adapter_model.bin").exists() diff --git a/tests/e2e/test_relora_llama.py b/tests/e2e/test_relora_llama.py new file mode 100644 index 0000000000000000000000000000000000000000..4ba130c9dcae876770d63520c2220f2c4efa8180 --- /dev/null +++ b/tests/e2e/test_relora_llama.py @@ -0,0 +1,68 @@ +""" +E2E tests for relora llama +""" + +import logging +import os +import unittest +from pathlib import Path + +from axolotl.cli import load_datasets +from axolotl.common.cli import TrainerCliArgs +from axolotl.train import train +from axolotl.utils.config import normalize_config +from axolotl.utils.dict import DictDefault + +from .utils import with_temp_dir + +LOG = logging.getLogger("axolotl.tests.e2e") +os.environ["WANDB_DISABLED"] = "true" + + +class TestReLoraLlama(unittest.TestCase): + """ + Test case for Llama models using LoRA + """ + + @with_temp_dir + def test_relora(self, temp_dir): + # pylint: disable=duplicate-code + cfg = DictDefault( + { + "base_model": "JackFram/llama-68m", + "tokenizer_type": "LlamaTokenizer", + "sequence_len": 1024, + "load_in_8bit": True, + "adapter": "lora", + "lora_r": 32, + "lora_alpha": 16, + "lora_dropout": 0.05, + "lora_target_modules": ["q_proj", "v_proj"], + "relora_steps": 25, + "relora_warmup_steps": 5, + "relora_anneal_steps": 5, + "relora_cpu_offload": True, + "val_set_size": 0.0, + "special_tokens": {}, + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + }, + ], + "warmup_steps": 15, + "num_epochs": 2, + "micro_batch_size": 4, + "gradient_accumulation_steps": 1, + "output_dir": temp_dir, + "learning_rate": 0.00001, + "optimizer": "adamw_torch", + "lr_scheduler": "cosine", + } + ) + normalize_config(cfg) + cli_args = TrainerCliArgs() + dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) + + train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) + assert (Path(temp_dir) / "model.safetensors").exists() diff --git a/tests/e2e/utils.py b/tests/e2e/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..837b4734fcaf3ace0f8720cf25175d636a87bc07 --- /dev/null +++ b/tests/e2e/utils.py @@ -0,0 +1,47 @@ +""" +helper utils for tests +""" +import os +import shutil +import tempfile +import unittest +from functools import wraps +from importlib.metadata import version +from pathlib import Path + + +def with_temp_dir(test_func): + @wraps(test_func) + def wrapper(*args, **kwargs): + # Create a temporary directory + temp_dir = tempfile.mkdtemp() + try: + # Pass the temporary directory to the test function + test_func(*args, temp_dir=temp_dir, **kwargs) + finally: + # Clean up the directory after the test + shutil.rmtree(temp_dir) + + return wrapper + + +def most_recent_subdir(path): + base_path = Path(path) + subdirectories = [d for d in base_path.iterdir() if d.is_dir()] + if not subdirectories: + return None + subdir = max(subdirectories, key=os.path.getctime) + + return subdir + + +def require_torch_2_1_1(test_case): + """ + Decorator marking a test that requires torch >= 2.1.1 + """ + + def is_min_2_1_1(): + torch_version = version("torch") + return torch_version >= "2.1.1" + + return unittest.skipUnless(is_min_2_1_1(), "test torch 2.1.1")(test_case) diff --git a/tests/fixtures/alpaca/alpaca.json b/tests/fixtures/alpaca/alpaca.json new file mode 100644 index 0000000000000000000000000000000000000000..912643d17fa230421cc64f1a1326a12b03ff4f4e --- /dev/null +++ b/tests/fixtures/alpaca/alpaca.json @@ -0,0 +1,12 @@ +[ + { + "instruction": "You will be given a series of words. Output these words in reverse order, with each word on its own line.", + "input": "Words: ['Hello', 'world'].", + "output": "['world', 'Hello']" + }, + { + "instruction": "In this task, you're given a short description of an event. Your job is to order the steps involved in the event from first to last. Note that there may be multiple correct answers for each event.", + "input": "Description: A man walks into a bar and orders a drink. He pays for his drink and leaves the bar.", + "output": "1. The man walks into the bar.\n2. He orders a drink.\n3. He pays for his drink.\n4. He leaves the bar." + } +] diff --git a/tests/fixtures/conversation.json b/tests/fixtures/conversation.json new file mode 100644 index 0000000000000000000000000000000000000000..bffb118a4196fcaa2e629c59c51e4c2c4e127c52 --- /dev/null +++ b/tests/fixtures/conversation.json @@ -0,0 +1,52 @@ +{ + "conversations": [ + { + "from": "human", + "value": "how could i implement a minesweeper algorithm that utilises algebraic topology to solve boards?" + }, + { + "from": "gpt", + "value": "It's possible to use algebraic topology to solve Minesweeper, but it would likely be quite complex. Algebraic topology is a branch of mathematics that studies the properties of spaces that are preserved under continuous deformations, such as stretching and bending. In the context of Minesweeper, this could be used to analyze the structure of the game board and determine the locations of mines based on certain topological invariants. However, implementing this would require a deep understanding of both algebraic topology and the game of Minesweeper. It's not something that can be explained in a simple answer." + }, + { + "from": "human", + "value": "could you give me the gist of how it could though?" + }, + { + "from": "gpt", + "value": "Sure. The basic idea would be to use algebraic topology to analyze the structure of the Minesweeper game board and determine the locations of mines based on certain topological invariants. This could involve representing the game board as a topological space and using topological invariants such as homotopy groups and Betti numbers to infer information about the location of mines on the board. However, as I mentioned before, implementing this would require a deep understanding of both algebraic topology and the game of Minesweeper, so it's not something that can be explained in a simple answer." + }, + { + "from": "human", + "value": "give me a specific example" + }, + { + "from": "gpt", + "value": "As a specific example, suppose we have a Minesweeper game board with the following configuration:\n```\n 1 1 1 1 1\n 1 ? ? ? 1\n 1 ? ? ? 1\n 1 ? ? ? 1\n 1 1 1 1 1\n```\nIn this case, the game board can be represented as a topological space, where each square on the board corresponds to a point in the space. The mines on the board can be thought of as \"holes\" in the space, which are points that are not part of the space. Using algebraic topology, we can analyze the structure of this space and determine the locations of the mines based on certain topological invariants.\n\nFor instance, one way to do this is to compute the homotopy groups of the space. Homotopy groups are a way of describing the holes in a space, and they can be used to infer information about the locations of the mines on the Minesweeper board. In the case of the board above, we can compute the first homotopy group of the space, which describes the number and \"shape\" of the holes in the space. In this case, the first homotopy group would tell us that there are four holes in the space, arranged in a square shape.\n\nBased on this information, we can infer that the mines in the game are located in the four corners of the board, as shown below:\n```\n 1 1 1 1 1\n 1 M ? ? 1\n 1 ? ? ? 1\n 1 ? ? M 1\n 1 1 1 1 1\n```\nOf course, this is just one example of how algebraic topology could be used to solve Minesweeper. There are many other possible approaches, and the specific details of how to implement them would depend on the specific game board and the topological invariants being used. As I mentioned before, implementing a solution like this would require a deep understanding of both algebraic topology and the game of Minesweeper." + }, + { + "from": "human", + "value": "assuming i was someone with a deep understanding of both, could you explain the same concept to me with that assumption?" + }, + { + "from": "gpt", + "value": "Sure. Assuming you have a deep understanding of both algebraic topology and the game of Minesweeper, here is how you might use algebraic topology to solve a Minesweeper game board:\n\n1. Represent the Minesweeper game board as a topological space. Each square on the board corresponds to a point in the space, and the mines on the board can be thought of as \"holes\" in the space, which are points that are not part of the space.\n2. Use topological invariants to infer information about the locations of the mines on the board. There are many possible topological invariants that could be used for this, such as homotopy groups, Betti numbers, or persistent homology.\n3. Use the information inferred from the topological invariants to determine the locations of the mines on the board. For example, if the first homotopy group of the space indicates that there are four holes in the space, arranged in a square shape, then we can infer that the mines are located in the four corners of the board.\n4. Use this information to solve the Minesweeper game. Once you know the locations of the mines, you can use this information to avoid them and clear the board.\n\nOf course, this is just one possible approach to using algebraic topology to solve Minesweeper. There may be other ways to do it, depending on the specific game board and the topological invariants being used. However, this should give you a general idea of how algebraic topology can be used to solve Minesweeper." + }, + { + "from": "human", + "value": "how would i do it with persistent homology or any of the other topological invariants?" + }, + { + "from": "gpt", + "value": "To use persistent homology or any of the other topological invariants to solve a Minesweeper game board, you would follow the same basic steps as described above, but with some differences in the specific details.\n\nFor instance, here is how you might use persistent homology to solve a Minesweeper game board:\n\n1. Represent the Minesweeper game board as a topological space. Each square on the board corresponds to a point in the space, and the mines on the board can be thought of as \"holes\" in the space, which are points that are not part of the space.\n2. Compute the persistent homology of the space. Persistent homology is a way of describing the holes in a space over time, and it can be used to infer information about the locations of the mines on the Minesweeper board.\n3. Use the information from the persistent homology to determine the locations of the mines on the board. For example, if the persistent homology indicates that there are four holes in the space, arranged in a square shape, then we can infer that the mines are located in the four corners of the board.\n4. Use this information to solve the Minesweeper game. Once you know the locations of the mines, you can use this information to avoid them and clear the board.\n\nOf course, this is just one possible approach to using persistent homology to solve Minesweeper. There may be other ways to do it, depending on the specific game board and the specific details of the persistent homology computation. However, this should give you a general idea of how persistent homology can be used to solve Minesweeper." + }, + { + "from": "human", + "value": "how could i implement this in python?" + }, + { + "from": "gpt", + "value": "To implement a Minesweeper solver that uses algebraic topology in Python, you would need to do the following:\n\n1. Install a Python package for algebraic topology. There are several options available, such as Gudhi, Dionysus, or scikit-tda. These packages provide algorithms and data structures for computing topological invariants, such as homotopy groups, Betti numbers, or persistent homology.\n2. Represent the Minesweeper game board as a topological space. This could be done using a data structure provided by the algebraic topology package, such as a simplicial complex or a Vietoris-Rips complex. Each square on the board would correspond to a point in the space, and the mines on the board would be represented as \"holes\" in the space.\n3. Compute the topological invariants of the space. This could be done using algorithms provided by the algebraic topology package. For example, you could compute the persistent homology of the space using the persistent homology algorithm.\n4. Use the information from the topological invariants to determine the locations of the mines on the board. This could be done using a combination of mathematical reasoning and programming. For example, if the persistent homology indicates that there are four holes in the space, arranged in a square shape, then you could use a piece of code to infer that the mines are located in the four corners of the board.\n5. Use this information to solve the Minesweeper game. Once you know the locations of the mines, you could use a piece of code to automatically click on the safe squares and clear the board.\n\nOf course, this is just one possible approach to implementing a Minesweeper solver that uses algebraic topology in Python. There may be other ways to do it, depending on the specific details of the implementation. However, this should give you a general idea of how to get started with such a project." + } + ] +} diff --git a/tests/fixtures/conversation.missingturns.json b/tests/fixtures/conversation.missingturns.json new file mode 100644 index 0000000000000000000000000000000000000000..b5b962d859bc94804885d2fbd6aca0910413b4c3 --- /dev/null +++ b/tests/fixtures/conversation.missingturns.json @@ -0,0 +1,52 @@ +{ + "conversations": [ + { + "from": "human", + "value": "how could i implement a minesweeper algorithm that utilises algebraic topology to solve boards?" + }, + { + "from": "gpt", + "value": "" + }, + { + "from": "human", + "value": "could you give me the gist of how it could though?" + }, + { + "from": "gpt", + "value": "Sure. The basic idea would be to use algebraic topology to analyze the structure of the Minesweeper game board and determine the locations of mines based on certain topological invariants. This could involve representing the game board as a topological space and using topological invariants such as homotopy groups and Betti numbers to infer information about the location of mines on the board. However, as I mentioned before, implementing this would require a deep understanding of both algebraic topology and the game of Minesweeper, so it's not something that can be explained in a simple answer." + }, + { + "from": "human", + "value": "give me a specific example" + }, + { + "from": "gpt", + "value": "As a specific example, suppose we have a Minesweeper game board with the following configuration:\n```\n 1 1 1 1 1\n 1 ? ? ? 1\n 1 ? ? ? 1\n 1 ? ? ? 1\n 1 1 1 1 1\n```\nIn this case, the game board can be represented as a topological space, where each square on the board corresponds to a point in the space. The mines on the board can be thought of as \"holes\" in the space, which are points that are not part of the space. Using algebraic topology, we can analyze the structure of this space and determine the locations of the mines based on certain topological invariants.\n\nFor instance, one way to do this is to compute the homotopy groups of the space. Homotopy groups are a way of describing the holes in a space, and they can be used to infer information about the locations of the mines on the Minesweeper board. In the case of the board above, we can compute the first homotopy group of the space, which describes the number and \"shape\" of the holes in the space. In this case, the first homotopy group would tell us that there are four holes in the space, arranged in a square shape.\n\nBased on this information, we can infer that the mines in the game are located in the four corners of the board, as shown below:\n```\n 1 1 1 1 1\n 1 M ? ? 1\n 1 ? ? ? 1\n 1 ? ? M 1\n 1 1 1 1 1\n```\nOf course, this is just one example of how algebraic topology could be used to solve Minesweeper. There are many other possible approaches, and the specific details of how to implement them would depend on the specific game board and the topological invariants being used. As I mentioned before, implementing a solution like this would require a deep understanding of both algebraic topology and the game of Minesweeper." + }, + { + "from": "human", + "value": "assuming i was someone with a deep understanding of both, could you explain the same concept to me with that assumption?" + }, + { + "from": "gpt", + "value": "Sure. Assuming you have a deep understanding of both algebraic topology and the game of Minesweeper, here is how you might use algebraic topology to solve a Minesweeper game board:\n\n1. Represent the Minesweeper game board as a topological space. Each square on the board corresponds to a point in the space, and the mines on the board can be thought of as \"holes\" in the space, which are points that are not part of the space.\n2. Use topological invariants to infer information about the locations of the mines on the board. There are many possible topological invariants that could be used for this, such as homotopy groups, Betti numbers, or persistent homology.\n3. Use the information inferred from the topological invariants to determine the locations of the mines on the board. For example, if the first homotopy group of the space indicates that there are four holes in the space, arranged in a square shape, then we can infer that the mines are located in the four corners of the board.\n4. Use this information to solve the Minesweeper game. Once you know the locations of the mines, you can use this information to avoid them and clear the board.\n\nOf course, this is just one possible approach to using algebraic topology to solve Minesweeper. There may be other ways to do it, depending on the specific game board and the topological invariants being used. However, this should give you a general idea of how algebraic topology can be used to solve Minesweeper." + }, + { + "from": "human", + "value": "how would i do it with persistent homology or any of the other topological invariants?" + }, + { + "from": "gpt", + "value": "To use persistent homology or any of the other topological invariants to solve a Minesweeper game board, you would follow the same basic steps as described above, but with some differences in the specific details.\n\nFor instance, here is how you might use persistent homology to solve a Minesweeper game board:\n\n1. Represent the Minesweeper game board as a topological space. Each square on the board corresponds to a point in the space, and the mines on the board can be thought of as \"holes\" in the space, which are points that are not part of the space.\n2. Compute the persistent homology of the space. Persistent homology is a way of describing the holes in a space over time, and it can be used to infer information about the locations of the mines on the Minesweeper board.\n3. Use the information from the persistent homology to determine the locations of the mines on the board. For example, if the persistent homology indicates that there are four holes in the space, arranged in a square shape, then we can infer that the mines are located in the four corners of the board.\n4. Use this information to solve the Minesweeper game. Once you know the locations of the mines, you can use this information to avoid them and clear the board.\n\nOf course, this is just one possible approach to using persistent homology to solve Minesweeper. There may be other ways to do it, depending on the specific game board and the specific details of the persistent homology computation. However, this should give you a general idea of how persistent homology can be used to solve Minesweeper." + }, + { + "from": "human", + "value": "how could i implement this in python?" + }, + { + "from": "gpt", + "value": "To implement a Minesweeper solver that uses algebraic topology in Python, you would need to do the following:\n\n1. Install a Python package for algebraic topology. There are several options available, such as Gudhi, Dionysus, or scikit-tda. These packages provide algorithms and data structures for computing topological invariants, such as homotopy groups, Betti numbers, or persistent homology.\n2. Represent the Minesweeper game board as a topological space. This could be done using a data structure provided by the algebraic topology package, such as a simplicial complex or a Vietoris-Rips complex. Each square on the board would correspond to a point in the space, and the mines on the board would be represented as \"holes\" in the space.\n3. Compute the topological invariants of the space. This could be done using algorithms provided by the algebraic topology package. For example, you could compute the persistent homology of the space using the persistent homology algorithm.\n4. Use the information from the topological invariants to determine the locations of the mines on the board. This could be done using a combination of mathematical reasoning and programming. For example, if the persistent homology indicates that there are four holes in the space, arranged in a square shape, then you could use a piece of code to infer that the mines are located in the four corners of the board.\n5. Use this information to solve the Minesweeper game. Once you know the locations of the mines, you could use a piece of code to automatically click on the safe squares and clear the board.\n\nOf course, this is just one possible approach to implementing a Minesweeper solver that uses algebraic topology in Python. There may be other ways to do it, depending on the specific details of the implementation. However, this should give you a general idea of how to get started with such a project." + } + ] +} diff --git a/tests/fixtures/conversation.tokenized.json b/tests/fixtures/conversation.tokenized.json new file mode 100644 index 0000000000000000000000000000000000000000..4b7334cc154310025e28325e249f81ab0ac65652 --- /dev/null +++ b/tests/fixtures/conversation.tokenized.json @@ -0,0 +1 @@ +{"input_ids": [1, 319, 13563, 1546, 263, 12758, 1404, 322, 385, 23116, 21082, 20255, 29889, 450, 20255, 4076, 8444, 29892, 13173, 29892, 322, 1248, 568, 6089, 304, 278, 1404, 29915, 29879, 5155, 29889, 29871, 3148, 1001, 29901, 920, 1033, 474, 2334, 263, 29086, 705, 11356, 5687, 393, 3667, 4637, 21531, 20159, 304, 4505, 1045, 3163, 29973, 29871, 319, 1799, 9047, 13566, 29901, 739, 29915, 29879, 1950, 304, 671, 21531, 20159, 304, 4505, 341, 1475, 705, 11356, 29892, 541, 372, 723, 5517, 367, 3755, 4280, 29889, 838, 5869, 293, 20159, 338, 263, 5443, 310, 23964, 393, 11898, 278, 4426, 310, 8162, 393, 526, 21634, 1090, 9126, 316, 689, 800, 29892, 1316, 408, 16116, 292, 322, 289, 2548, 29889, 512, 278, 3030, 310, 341, 1475, 705, 11356, 29892, 445, 1033, 367, 1304, 304, 27599, 278, 3829, 310, 278, 3748, 7613, 322, 8161, 278, 14354, 310, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 2398, 29892, 16049, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29889, 739, 29915, 29879, 451, 1554, 393, 508, 367, 10824, 297, 263, 2560, 1234, 21106, 29879, 29958, 2, 3148, 1001, 29901, 1033, 366, 2367, 592, 278, 330, 391, 310, 920, 372, 1033, 2466, 29973, 29871, 319, 1799, 9047, 13566, 29901, 18585, 29889, 450, 6996, 2969, 723, 367, 304, 671, 21531, 20159, 304, 27599, 278, 3829, 310, 278, 341, 1475, 705, 11356, 3748, 7613, 322, 8161, 278, 14354, 310, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 910, 1033, 25135, 15783, 278, 3748, 7613, 408, 263, 25002, 2913, 322, 773, 25002, 15939, 1934, 1316, 408, 3632, 327, 2270, 6471, 322, 350, 9890, 3694, 304, 10115, 2472, 1048, 278, 4423, 310, 29086, 373, 278, 7613, 29889, 2398, 29892, 408, 306, 5276, 1434, 29892, 16049, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29892, 577, 372, 29915, 29879, 451, 1554, 393, 508, 367, 10824, 297, 263, 2560, 1234, 21106, 29879, 29958, 2, 3148, 1001, 29901, 2367, 592, 263, 2702, 1342, 29871, 319, 1799, 9047, 13566, 29901, 1094, 263, 2702, 1342, 29892, 7755, 591, 505, 263, 341, 1475, 705, 11356, 3748, 7613, 411, 278, 1494, 5285, 29901, 13, 28956, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 28956, 13, 797, 445, 1206, 29892, 278, 3748, 7613, 508, 367, 9875, 408, 263, 25002, 2913, 29892, 988, 1269, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29889, 450, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 5293, 21531, 20159, 29892, 591, 508, 27599, 278, 3829, 310, 445, 2913, 322, 8161, 278, 14354, 310, 278, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 13, 13, 2831, 2777, 29892, 697, 982, 304, 437, 445, 338, 304, 10272, 278, 3632, 327, 2270, 6471, 310, 278, 2913, 29889, 15089, 327, 2270, 6471, 526, 263, 982, 310, 20766, 278, 26532, 297, 263, 2913, 29892, 322, 896, 508, 367, 1304, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 341, 1475, 705, 11356, 7613, 29889, 512, 278, 1206, 310, 278, 7613, 2038, 29892, 591, 508, 10272, 278, 937, 3632, 327, 2270, 2318, 310, 278, 2913, 29892, 607, 16612, 278, 1353, 322, 376, 12181, 29908, 310, 278, 26532, 297, 278, 2913, 29889, 512, 445, 1206, 29892, 278, 937, 3632, 327, 2270, 2318, 723, 2649, 502, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29889, 13, 13, 29933, 1463, 373, 445, 2472, 29892, 591, 508, 10115, 393, 278, 29086, 297, 278, 3748, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29892, 408, 4318, 2400, 29901, 13, 28956, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 29871, 29896, 29871, 341, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 341, 259, 29896, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 28956, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1342, 310, 920, 21531, 20159, 1033, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 526, 1784, 916, 1950, 13501, 29892, 322, 278, 2702, 4902, 310, 920, 304, 2334, 963, 723, 8839, 373, 278, 2702, 3748, 7613, 322, 278, 25002, 15939, 1934, 1641, 1304, 29889, 1094, 306, 5276, 1434, 29892, 16049, 263, 1650, 763, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 21106, 29879, 29958, 2, 3148, 1001, 29901, 10241, 474, 471, 4856, 411, 263, 6483, 8004, 310, 1716, 29892, 1033, 366, 5649, 278, 1021, 6964, 304, 592, 411, 393, 11833, 29973, 29871, 319, 1799, 9047, 13566, 29901, 18585, 29889, 17090, 366, 505, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29892, 1244, 338, 920, 366, 1795, 671, 21531, 20159, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29901, 13, 13, 29896, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 7806, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 13, 29906, 29889, 4803, 25002, 15939, 1934, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1670, 526, 1784, 1950, 25002, 15939, 1934, 393, 1033, 367, 1304, 363, 445, 29892, 1316, 408, 3632, 327, 2270, 6471, 29892, 350, 9890, 3694, 29892, 470, 28152, 3632, 3002, 29889, 13, 29941, 29889, 4803, 278, 2472, 10115, 1127, 515, 278, 25002, 15939, 1934, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1152, 1342, 29892, 565, 278, 937, 3632, 327, 2270, 2318, 310, 278, 2913, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 591, 508, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29946, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 508, 671, 445, 2472, 304, 4772, 963, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 773, 21531, 20159, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 3748, 7613, 322, 278, 25002, 15939, 1934, 1641, 1304, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 21531, 20159, 508, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 21106, 29879, 29958, 2, 3148, 1001, 29901, 920, 723, 474, 437, 372, 411, 28152, 3632, 3002, 470, 738, 310, 278, 916, 25002, 15939, 1934, 29973, 29871, 319, 1799, 9047, 13566, 29901, 1763, 671, 28152, 3632, 3002, 470, 738, 310, 278, 916, 25002, 15939, 1934, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29892, 366, 723, 1101, 278, 1021, 6996, 6576, 408, 5439, 2038, 29892, 541, 411, 777, 12651, 297, 278, 2702, 4902, 29889, 13, 13, 2831, 2777, 29892, 1244, 338, 920, 366, 1795, 671, 28152, 3632, 3002, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29901, 13, 13, 29896, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 7806, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 13, 29906, 29889, 11796, 29872, 278, 28152, 3632, 3002, 310, 278, 2913, 29889, 9034, 9696, 3632, 3002, 338, 263, 982, 310, 20766, 278, 26532, 297, 263, 2913, 975, 931, 29892, 322, 372, 508, 367, 1304, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 341, 1475, 705, 11356, 7613, 29889, 13, 29941, 29889, 4803, 278, 2472, 515, 278, 28152, 3632, 3002, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1152, 1342, 29892, 565, 278, 28152, 3632, 3002, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 591, 508, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29946, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 508, 671, 445, 2472, 304, 4772, 963, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 773, 28152, 3632, 3002, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 3748, 7613, 322, 278, 2702, 4902, 310, 278, 28152, 3632, 3002, 16287, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 28152, 3632, 3002, 508, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 21106, 29879, 29958, 2, 3148, 1001, 29901, 920, 1033, 474, 2334, 445, 297, 3017, 29973, 29871, 319, 1799, 9047, 13566, 29901, 1763, 2334, 263, 341, 1475, 705, 11356, 899, 369, 393, 3913, 21531, 20159, 297, 5132, 29892, 366, 723, 817, 304, 437, 278, 1494, 29901, 13, 13, 29896, 29889, 16052, 263, 5132, 3577, 363, 21531, 20159, 29889, 1670, 526, 3196, 3987, 3625, 29892, 1316, 408, 402, 566, 2918, 29892, 360, 291, 952, 375, 29892, 470, 4560, 7354, 29899, 29873, 1388, 29889, 4525, 9741, 3867, 14009, 322, 848, 12286, 363, 20602, 25002, 15939, 1934, 29892, 1316, 408, 3632, 327, 2270, 6471, 29892, 350, 9890, 3694, 29892, 470, 28152, 3632, 3002, 29889, 13, 29906, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 910, 1033, 367, 2309, 773, 263, 848, 3829, 4944, 491, 278, 21531, 20159, 3577, 29892, 1316, 408, 263, 3053, 506, 616, 4280, 470, 263, 478, 2035, 29367, 29899, 29934, 4512, 4280, 29889, 7806, 6862, 373, 278, 7613, 723, 3928, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 723, 367, 9875, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29889, 13, 29941, 29889, 11796, 29872, 278, 25002, 15939, 1934, 310, 278, 2913, 29889, 910, 1033, 367, 2309, 773, 14009, 4944, 491, 278, 21531, 20159, 3577, 29889, 1152, 1342, 29892, 366, 1033, 10272, 278, 28152, 3632, 3002, 310, 278, 2913, 773, 278, 28152, 3632, 3002, 5687, 29889, 13, 29946, 29889, 4803, 278, 2472, 515, 278, 25002, 15939, 1934, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 910, 1033, 367, 2309, 773, 263, 10296, 310, 19475, 24481, 322, 8720, 29889, 1152, 1342, 29892, 565, 278, 28152, 3632, 3002, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 366, 1033, 671, 263, 8424, 310, 775, 304, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29945, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 1033, 671, 263, 8424, 310, 775, 304, 6336, 2828, 373, 278, 9109, 25256, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 16049, 263, 341, 1475, 705, 11356, 899, 369, 393, 3913, 21531, 20159, 297, 5132, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 4902, 310, 278, 5314, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 304, 679, 4687, 411, 1316, 263, 2060, 21106, 29879, 29958, 2], "attention_mask": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], "labels": [-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 739, 29915, 29879, 1950, 304, 671, 21531, 20159, 304, 4505, 341, 1475, 705, 11356, 29892, 541, 372, 723, 5517, 367, 3755, 4280, 29889, 838, 5869, 293, 20159, 338, 263, 5443, 310, 23964, 393, 11898, 278, 4426, 310, 8162, 393, 526, 21634, 1090, 9126, 316, 689, 800, 29892, 1316, 408, 16116, 292, 322, 289, 2548, 29889, 512, 278, 3030, 310, 341, 1475, 705, 11356, 29892, 445, 1033, 367, 1304, 304, 27599, 278, 3829, 310, 278, 3748, 7613, 322, 8161, 278, 14354, 310, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 2398, 29892, 16049, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29889, 739, 29915, 29879, 451, 1554, 393, 508, 367, 10824, 297, 263, 2560, 1234, 21106, 29879, 29958, 2, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 18585, 29889, 450, 6996, 2969, 723, 367, 304, 671, 21531, 20159, 304, 27599, 278, 3829, 310, 278, 341, 1475, 705, 11356, 3748, 7613, 322, 8161, 278, 14354, 310, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 910, 1033, 25135, 15783, 278, 3748, 7613, 408, 263, 25002, 2913, 322, 773, 25002, 15939, 1934, 1316, 408, 3632, 327, 2270, 6471, 322, 350, 9890, 3694, 304, 10115, 2472, 1048, 278, 4423, 310, 29086, 373, 278, 7613, 29889, 2398, 29892, 408, 306, 5276, 1434, 29892, 16049, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29892, 577, 372, 29915, 29879, 451, 1554, 393, 508, 367, 10824, 297, 263, 2560, 1234, 21106, 29879, 29958, 2, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 1094, 263, 2702, 1342, 29892, 7755, 591, 505, 263, 341, 1475, 705, 11356, 3748, 7613, 411, 278, 1494, 5285, 29901, 13, 28956, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 28956, 13, 797, 445, 1206, 29892, 278, 3748, 7613, 508, 367, 9875, 408, 263, 25002, 2913, 29892, 988, 1269, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29889, 450, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 5293, 21531, 20159, 29892, 591, 508, 27599, 278, 3829, 310, 445, 2913, 322, 8161, 278, 14354, 310, 278, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 13, 13, 2831, 2777, 29892, 697, 982, 304, 437, 445, 338, 304, 10272, 278, 3632, 327, 2270, 6471, 310, 278, 2913, 29889, 15089, 327, 2270, 6471, 526, 263, 982, 310, 20766, 278, 26532, 297, 263, 2913, 29892, 322, 896, 508, 367, 1304, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 341, 1475, 705, 11356, 7613, 29889, 512, 278, 1206, 310, 278, 7613, 2038, 29892, 591, 508, 10272, 278, 937, 3632, 327, 2270, 2318, 310, 278, 2913, 29892, 607, 16612, 278, 1353, 322, 376, 12181, 29908, 310, 278, 26532, 297, 278, 2913, 29889, 512, 445, 1206, 29892, 278, 937, 3632, 327, 2270, 2318, 723, 2649, 502, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29889, 13, 13, 29933, 1463, 373, 445, 2472, 29892, 591, 508, 10115, 393, 278, 29086, 297, 278, 3748, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29892, 408, 4318, 2400, 29901, 13, 28956, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 29871, 29896, 29871, 341, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 341, 259, 29896, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 28956, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1342, 310, 920, 21531, 20159, 1033, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 526, 1784, 916, 1950, 13501, 29892, 322, 278, 2702, 4902, 310, 920, 304, 2334, 963, 723, 8839, 373, 278, 2702, 3748, 7613, 322, 278, 25002, 15939, 1934, 1641, 1304, 29889, 1094, 306, 5276, 1434, 29892, 16049, 263, 1650, 763, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 21106, 29879, 29958, 2, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 18585, 29889, 17090, 366, 505, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29892, 1244, 338, 920, 366, 1795, 671, 21531, 20159, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29901, 13, 13, 29896, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 7806, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 13, 29906, 29889, 4803, 25002, 15939, 1934, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1670, 526, 1784, 1950, 25002, 15939, 1934, 393, 1033, 367, 1304, 363, 445, 29892, 1316, 408, 3632, 327, 2270, 6471, 29892, 350, 9890, 3694, 29892, 470, 28152, 3632, 3002, 29889, 13, 29941, 29889, 4803, 278, 2472, 10115, 1127, 515, 278, 25002, 15939, 1934, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1152, 1342, 29892, 565, 278, 937, 3632, 327, 2270, 2318, 310, 278, 2913, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 591, 508, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29946, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 508, 671, 445, 2472, 304, 4772, 963, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 773, 21531, 20159, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 3748, 7613, 322, 278, 25002, 15939, 1934, 1641, 1304, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 21531, 20159, 508, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 21106, 29879, 29958, 2, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 1763, 671, 28152, 3632, 3002, 470, 738, 310, 278, 916, 25002, 15939, 1934, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29892, 366, 723, 1101, 278, 1021, 6996, 6576, 408, 5439, 2038, 29892, 541, 411, 777, 12651, 297, 278, 2702, 4902, 29889, 13, 13, 2831, 2777, 29892, 1244, 338, 920, 366, 1795, 671, 28152, 3632, 3002, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29901, 13, 13, 29896, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 7806, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 13, 29906, 29889, 11796, 29872, 278, 28152, 3632, 3002, 310, 278, 2913, 29889, 9034, 9696, 3632, 3002, 338, 263, 982, 310, 20766, 278, 26532, 297, 263, 2913, 975, 931, 29892, 322, 372, 508, 367, 1304, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 341, 1475, 705, 11356, 7613, 29889, 13, 29941, 29889, 4803, 278, 2472, 515, 278, 28152, 3632, 3002, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1152, 1342, 29892, 565, 278, 28152, 3632, 3002, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 591, 508, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29946, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 508, 671, 445, 2472, 304, 4772, 963, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 773, 28152, 3632, 3002, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 3748, 7613, 322, 278, 2702, 4902, 310, 278, 28152, 3632, 3002, 16287, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 28152, 3632, 3002, 508, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 21106, 29879, 29958, 2, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 1763, 2334, 263, 341, 1475, 705, 11356, 899, 369, 393, 3913, 21531, 20159, 297, 5132, 29892, 366, 723, 817, 304, 437, 278, 1494, 29901, 13, 13, 29896, 29889, 16052, 263, 5132, 3577, 363, 21531, 20159, 29889, 1670, 526, 3196, 3987, 3625, 29892, 1316, 408, 402, 566, 2918, 29892, 360, 291, 952, 375, 29892, 470, 4560, 7354, 29899, 29873, 1388, 29889, 4525, 9741, 3867, 14009, 322, 848, 12286, 363, 20602, 25002, 15939, 1934, 29892, 1316, 408, 3632, 327, 2270, 6471, 29892, 350, 9890, 3694, 29892, 470, 28152, 3632, 3002, 29889, 13, 29906, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 910, 1033, 367, 2309, 773, 263, 848, 3829, 4944, 491, 278, 21531, 20159, 3577, 29892, 1316, 408, 263, 3053, 506, 616, 4280, 470, 263, 478, 2035, 29367, 29899, 29934, 4512, 4280, 29889, 7806, 6862, 373, 278, 7613, 723, 3928, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 723, 367, 9875, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29889, 13, 29941, 29889, 11796, 29872, 278, 25002, 15939, 1934, 310, 278, 2913, 29889, 910, 1033, 367, 2309, 773, 14009, 4944, 491, 278, 21531, 20159, 3577, 29889, 1152, 1342, 29892, 366, 1033, 10272, 278, 28152, 3632, 3002, 310, 278, 2913, 773, 278, 28152, 3632, 3002, 5687, 29889, 13, 29946, 29889, 4803, 278, 2472, 515, 278, 25002, 15939, 1934, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 910, 1033, 367, 2309, 773, 263, 10296, 310, 19475, 24481, 322, 8720, 29889, 1152, 1342, 29892, 565, 278, 28152, 3632, 3002, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 366, 1033, 671, 263, 8424, 310, 775, 304, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29945, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 1033, 671, 263, 8424, 310, 775, 304, 6336, 2828, 373, 278, 9109, 25256, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 16049, 263, 341, 1475, 705, 11356, 899, 369, 393, 3913, 21531, 20159, 297, 5132, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 4902, 310, 278, 5314, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 304, 679, 4687, 411, 1316, 263, 2060, 21106, 29879, 29958, 2]} diff --git a/tests/fixtures/conversation.tokenized_llama2chat.json b/tests/fixtures/conversation.tokenized_llama2chat.json new file mode 100644 index 0000000000000000000000000000000000000000..a4109fc3e206a455b02f9607c7f325afa71435cb --- /dev/null +++ b/tests/fixtures/conversation.tokenized_llama2chat.json @@ -0,0 +1 @@ +{"input_ids": [1, 518, 25580, 29962, 3532, 14816, 29903, 6778, 13, 3492, 526, 263, 8444, 29892, 3390, 1319, 322, 15993, 20255, 29889, 29849, 1234, 408, 1371, 3730, 408, 1950, 29892, 1550, 1641, 9109, 29889, 3575, 6089, 881, 451, 3160, 738, 10311, 1319, 29892, 443, 621, 936, 29892, 11021, 391, 29892, 7916, 391, 29892, 304, 27375, 29892, 18215, 29892, 470, 27302, 2793, 29889, 3529, 9801, 393, 596, 20890, 526, 5374, 635, 443, 5365, 1463, 322, 6374, 297, 5469, 29889, 13, 13, 3644, 263, 1139, 947, 451, 1207, 738, 4060, 29892, 470, 338, 451, 2114, 1474, 16165, 261, 296, 29892, 5649, 2020, 2012, 310, 22862, 1554, 451, 1959, 29889, 960, 366, 1016, 29915, 29873, 1073, 278, 1234, 304, 263, 1139, 29892, 3113, 1016, 29915, 29873, 6232, 2089, 2472, 29889, 13, 29966, 829, 14816, 29903, 6778, 13, 13, 3525, 1033, 474, 2334, 263, 29086, 705, 11356, 5687, 393, 3667, 4637, 21531, 20159, 304, 4505, 1045, 3163, 29973, 518, 29914, 25580, 29962, 739, 29915, 29879, 1950, 304, 671, 21531, 20159, 304, 4505, 341, 1475, 705, 11356, 29892, 541, 372, 723, 5517, 367, 3755, 4280, 29889, 838, 5869, 293, 20159, 338, 263, 5443, 310, 23964, 393, 11898, 278, 4426, 310, 8162, 393, 526, 21634, 1090, 9126, 316, 689, 800, 29892, 1316, 408, 16116, 292, 322, 289, 2548, 29889, 512, 278, 3030, 310, 341, 1475, 705, 11356, 29892, 445, 1033, 367, 1304, 304, 27599, 278, 3829, 310, 278, 3748, 7613, 322, 8161, 278, 14354, 310, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 2398, 29892, 16049, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29889, 739, 29915, 29879, 451, 1554, 393, 508, 367, 10824, 297, 263, 2560, 1234, 29889, 29871, 2, 1, 518, 25580, 29962, 1033, 366, 2367, 592, 278, 330, 391, 310, 920, 372, 1033, 2466, 29973, 518, 29914, 25580, 29962, 18585, 29889, 450, 6996, 2969, 723, 367, 304, 671, 21531, 20159, 304, 27599, 278, 3829, 310, 278, 341, 1475, 705, 11356, 3748, 7613, 322, 8161, 278, 14354, 310, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 910, 1033, 25135, 15783, 278, 3748, 7613, 408, 263, 25002, 2913, 322, 773, 25002, 15939, 1934, 1316, 408, 3632, 327, 2270, 6471, 322, 350, 9890, 3694, 304, 10115, 2472, 1048, 278, 4423, 310, 29086, 373, 278, 7613, 29889, 2398, 29892, 408, 306, 5276, 1434, 29892, 16049, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29892, 577, 372, 29915, 29879, 451, 1554, 393, 508, 367, 10824, 297, 263, 2560, 1234, 29889, 29871, 2, 1, 518, 25580, 29962, 2367, 592, 263, 2702, 1342, 518, 29914, 25580, 29962, 1094, 263, 2702, 1342, 29892, 7755, 591, 505, 263, 341, 1475, 705, 11356, 3748, 7613, 411, 278, 1494, 5285, 29901, 13, 28956, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 28956, 13, 797, 445, 1206, 29892, 278, 3748, 7613, 508, 367, 9875, 408, 263, 25002, 2913, 29892, 988, 1269, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29889, 450, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 5293, 21531, 20159, 29892, 591, 508, 27599, 278, 3829, 310, 445, 2913, 322, 8161, 278, 14354, 310, 278, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 13, 13, 2831, 2777, 29892, 697, 982, 304, 437, 445, 338, 304, 10272, 278, 3632, 327, 2270, 6471, 310, 278, 2913, 29889, 15089, 327, 2270, 6471, 526, 263, 982, 310, 20766, 278, 26532, 297, 263, 2913, 29892, 322, 896, 508, 367, 1304, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 341, 1475, 705, 11356, 7613, 29889, 512, 278, 1206, 310, 278, 7613, 2038, 29892, 591, 508, 10272, 278, 937, 3632, 327, 2270, 2318, 310, 278, 2913, 29892, 607, 16612, 278, 1353, 322, 376, 12181, 29908, 310, 278, 26532, 297, 278, 2913, 29889, 512, 445, 1206, 29892, 278, 937, 3632, 327, 2270, 2318, 723, 2649, 502, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29889, 13, 13, 29933, 1463, 373, 445, 2472, 29892, 591, 508, 10115, 393, 278, 29086, 297, 278, 3748, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29892, 408, 4318, 2400, 29901, 13, 28956, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 29871, 29896, 29871, 341, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 341, 259, 29896, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 28956, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1342, 310, 920, 21531, 20159, 1033, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 526, 1784, 916, 1950, 13501, 29892, 322, 278, 2702, 4902, 310, 920, 304, 2334, 963, 723, 8839, 373, 278, 2702, 3748, 7613, 322, 278, 25002, 15939, 1934, 1641, 1304, 29889, 1094, 306, 5276, 1434, 29892, 16049, 263, 1650, 763, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29889, 29871, 2, 1, 518, 25580, 29962, 10241, 474, 471, 4856, 411, 263, 6483, 8004, 310, 1716, 29892, 1033, 366, 5649, 278, 1021, 6964, 304, 592, 411, 393, 11833, 29973, 518, 29914, 25580, 29962, 18585, 29889, 17090, 366, 505, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29892, 1244, 338, 920, 366, 1795, 671, 21531, 20159, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29901, 13, 13, 29896, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 7806, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 13, 29906, 29889, 4803, 25002, 15939, 1934, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1670, 526, 1784, 1950, 25002, 15939, 1934, 393, 1033, 367, 1304, 363, 445, 29892, 1316, 408, 3632, 327, 2270, 6471, 29892, 350, 9890, 3694, 29892, 470, 28152, 3632, 3002, 29889, 13, 29941, 29889, 4803, 278, 2472, 10115, 1127, 515, 278, 25002, 15939, 1934, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1152, 1342, 29892, 565, 278, 937, 3632, 327, 2270, 2318, 310, 278, 2913, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 591, 508, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29946, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 508, 671, 445, 2472, 304, 4772, 963, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 773, 21531, 20159, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 3748, 7613, 322, 278, 25002, 15939, 1934, 1641, 1304, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 21531, 20159, 508, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 29889, 29871, 2, 1, 518, 25580, 29962, 920, 723, 474, 437, 372, 411, 28152, 3632, 3002, 470, 738, 310, 278, 916, 25002, 15939, 1934, 29973, 518, 29914, 25580, 29962, 1763, 671, 28152, 3632, 3002, 470, 738, 310, 278, 916, 25002, 15939, 1934, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29892, 366, 723, 1101, 278, 1021, 6996, 6576, 408, 5439, 2038, 29892, 541, 411, 777, 12651, 297, 278, 2702, 4902, 29889, 13, 13, 2831, 2777, 29892, 1244, 338, 920, 366, 1795, 671, 28152, 3632, 3002, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29901, 13, 13, 29896, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 7806, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 13, 29906, 29889, 11796, 29872, 278, 28152, 3632, 3002, 310, 278, 2913, 29889, 9034, 9696, 3632, 3002, 338, 263, 982, 310, 20766, 278, 26532, 297, 263, 2913, 975, 931, 29892, 322, 372, 508, 367, 1304, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 341, 1475, 705, 11356, 7613, 29889, 13, 29941, 29889, 4803, 278, 2472, 515, 278, 28152, 3632, 3002, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1152, 1342, 29892, 565, 278, 28152, 3632, 3002, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 591, 508, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29946, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 508, 671, 445, 2472, 304, 4772, 963, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 773, 28152, 3632, 3002, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 3748, 7613, 322, 278, 2702, 4902, 310, 278, 28152, 3632, 3002, 16287, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 28152, 3632, 3002, 508, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 29889, 29871, 2, 1, 518, 25580, 29962, 920, 1033, 474, 2334, 445, 297, 3017, 29973, 518, 29914, 25580, 29962, 1763, 2334, 263, 341, 1475, 705, 11356, 899, 369, 393, 3913, 21531, 20159, 297, 5132, 29892, 366, 723, 817, 304, 437, 278, 1494, 29901, 13, 13, 29896, 29889, 16052, 263, 5132, 3577, 363, 21531, 20159, 29889, 1670, 526, 3196, 3987, 3625, 29892, 1316, 408, 402, 566, 2918, 29892, 360, 291, 952, 375, 29892, 470, 4560, 7354, 29899, 29873, 1388, 29889, 4525, 9741, 3867, 14009, 322, 848, 12286, 363, 20602, 25002, 15939, 1934, 29892, 1316, 408, 3632, 327, 2270, 6471, 29892, 350, 9890, 3694, 29892, 470, 28152, 3632, 3002, 29889, 13, 29906, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 910, 1033, 367, 2309, 773, 263, 848, 3829, 4944, 491, 278, 21531, 20159, 3577, 29892, 1316, 408, 263, 3053, 506, 616, 4280, 470, 263, 478, 2035, 29367, 29899, 29934, 4512, 4280, 29889, 7806, 6862, 373, 278, 7613, 723, 3928, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 723, 367, 9875, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29889, 13, 29941, 29889, 11796, 29872, 278, 25002, 15939, 1934, 310, 278, 2913, 29889, 910, 1033, 367, 2309, 773, 14009, 4944, 491, 278, 21531, 20159, 3577, 29889, 1152, 1342, 29892, 366, 1033, 10272, 278, 28152, 3632, 3002, 310, 278, 2913, 773, 278, 28152, 3632, 3002, 5687, 29889, 13, 29946, 29889, 4803, 278, 2472, 515, 278, 25002, 15939, 1934, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 910, 1033, 367, 2309, 773, 263, 10296, 310, 19475, 24481, 322, 8720, 29889, 1152, 1342, 29892, 565, 278, 28152, 3632, 3002, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 366, 1033, 671, 263, 8424, 310, 775, 304, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29945, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 1033, 671, 263, 8424, 310, 775, 304, 6336, 2828, 373, 278, 9109, 25256, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 16049, 263, 341, 1475, 705, 11356, 899, 369, 393, 3913, 21531, 20159, 297, 5132, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 4902, 310, 278, 5314, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 304, 679, 4687, 411, 1316, 263, 2060, 29889, 29871, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], "labels": [-100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 739, 29915, 29879, 1950, 304, 671, 21531, 20159, 304, 4505, 341, 1475, 705, 11356, 29892, 541, 372, 723, 5517, 367, 3755, 4280, 29889, 838, 5869, 293, 20159, 338, 263, 5443, 310, 23964, 393, 11898, 278, 4426, 310, 8162, 393, 526, 21634, 1090, 9126, 316, 689, 800, 29892, 1316, 408, 16116, 292, 322, 289, 2548, 29889, 512, 278, 3030, 310, 341, 1475, 705, 11356, 29892, 445, 1033, 367, 1304, 304, 27599, 278, 3829, 310, 278, 3748, 7613, 322, 8161, 278, 14354, 310, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 2398, 29892, 16049, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29889, 739, 29915, 29879, 451, 1554, 393, 508, 367, 10824, 297, 263, 2560, 1234, 29889, 29871, 2, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 18585, 29889, 450, 6996, 2969, 723, 367, 304, 671, 21531, 20159, 304, 27599, 278, 3829, 310, 278, 341, 1475, 705, 11356, 3748, 7613, 322, 8161, 278, 14354, 310, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 910, 1033, 25135, 15783, 278, 3748, 7613, 408, 263, 25002, 2913, 322, 773, 25002, 15939, 1934, 1316, 408, 3632, 327, 2270, 6471, 322, 350, 9890, 3694, 304, 10115, 2472, 1048, 278, 4423, 310, 29086, 373, 278, 7613, 29889, 2398, 29892, 408, 306, 5276, 1434, 29892, 16049, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29892, 577, 372, 29915, 29879, 451, 1554, 393, 508, 367, 10824, 297, 263, 2560, 1234, 29889, 29871, 2, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 1094, 263, 2702, 1342, 29892, 7755, 591, 505, 263, 341, 1475, 705, 11356, 3748, 7613, 411, 278, 1494, 5285, 29901, 13, 28956, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 28956, 13, 797, 445, 1206, 29892, 278, 3748, 7613, 508, 367, 9875, 408, 263, 25002, 2913, 29892, 988, 1269, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29889, 450, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 5293, 21531, 20159, 29892, 591, 508, 27599, 278, 3829, 310, 445, 2913, 322, 8161, 278, 14354, 310, 278, 29086, 2729, 373, 3058, 25002, 15939, 1934, 29889, 13, 13, 2831, 2777, 29892, 697, 982, 304, 437, 445, 338, 304, 10272, 278, 3632, 327, 2270, 6471, 310, 278, 2913, 29889, 15089, 327, 2270, 6471, 526, 263, 982, 310, 20766, 278, 26532, 297, 263, 2913, 29892, 322, 896, 508, 367, 1304, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 341, 1475, 705, 11356, 7613, 29889, 512, 278, 1206, 310, 278, 7613, 2038, 29892, 591, 508, 10272, 278, 937, 3632, 327, 2270, 2318, 310, 278, 2913, 29892, 607, 16612, 278, 1353, 322, 376, 12181, 29908, 310, 278, 26532, 297, 278, 2913, 29889, 512, 445, 1206, 29892, 278, 937, 3632, 327, 2270, 2318, 723, 2649, 502, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29889, 13, 13, 29933, 1463, 373, 445, 2472, 29892, 591, 508, 10115, 393, 278, 29086, 297, 278, 3748, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29892, 408, 4318, 2400, 29901, 13, 28956, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 29871, 29896, 29871, 341, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 1577, 259, 29896, 13, 29871, 29896, 29871, 1577, 29871, 1577, 29871, 341, 259, 29896, 13, 29871, 29896, 259, 29896, 259, 29896, 259, 29896, 259, 29896, 13, 28956, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1342, 310, 920, 21531, 20159, 1033, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 526, 1784, 916, 1950, 13501, 29892, 322, 278, 2702, 4902, 310, 920, 304, 2334, 963, 723, 8839, 373, 278, 2702, 3748, 7613, 322, 278, 25002, 15939, 1934, 1641, 1304, 29889, 1094, 306, 5276, 1434, 29892, 16049, 263, 1650, 763, 445, 723, 1996, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29889, 29871, 2, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 18585, 29889, 17090, 366, 505, 263, 6483, 8004, 310, 1716, 21531, 20159, 322, 278, 3748, 310, 341, 1475, 705, 11356, 29892, 1244, 338, 920, 366, 1795, 671, 21531, 20159, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29901, 13, 13, 29896, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 7806, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 13, 29906, 29889, 4803, 25002, 15939, 1934, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1670, 526, 1784, 1950, 25002, 15939, 1934, 393, 1033, 367, 1304, 363, 445, 29892, 1316, 408, 3632, 327, 2270, 6471, 29892, 350, 9890, 3694, 29892, 470, 28152, 3632, 3002, 29889, 13, 29941, 29889, 4803, 278, 2472, 10115, 1127, 515, 278, 25002, 15939, 1934, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1152, 1342, 29892, 565, 278, 937, 3632, 327, 2270, 2318, 310, 278, 2913, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 591, 508, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29946, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 508, 671, 445, 2472, 304, 4772, 963, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 773, 21531, 20159, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 3748, 7613, 322, 278, 25002, 15939, 1934, 1641, 1304, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 21531, 20159, 508, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 29889, 29871, 2, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 1763, 671, 28152, 3632, 3002, 470, 738, 310, 278, 916, 25002, 15939, 1934, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29892, 366, 723, 1101, 278, 1021, 6996, 6576, 408, 5439, 2038, 29892, 541, 411, 777, 12651, 297, 278, 2702, 4902, 29889, 13, 13, 2831, 2777, 29892, 1244, 338, 920, 366, 1795, 671, 28152, 3632, 3002, 304, 4505, 263, 341, 1475, 705, 11356, 3748, 7613, 29901, 13, 13, 29896, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 7806, 6862, 373, 278, 7613, 16161, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 508, 367, 2714, 310, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29892, 607, 526, 3291, 393, 526, 451, 760, 310, 278, 2913, 29889, 13, 29906, 29889, 11796, 29872, 278, 28152, 3632, 3002, 310, 278, 2913, 29889, 9034, 9696, 3632, 3002, 338, 263, 982, 310, 20766, 278, 26532, 297, 263, 2913, 975, 931, 29892, 322, 372, 508, 367, 1304, 304, 10115, 2472, 1048, 278, 14354, 310, 278, 29086, 373, 278, 341, 1475, 705, 11356, 7613, 29889, 13, 29941, 29889, 4803, 278, 2472, 515, 278, 28152, 3632, 3002, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 1152, 1342, 29892, 565, 278, 28152, 3632, 3002, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 591, 508, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29946, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 508, 671, 445, 2472, 304, 4772, 963, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 773, 28152, 3632, 3002, 304, 4505, 341, 1475, 705, 11356, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 3748, 7613, 322, 278, 2702, 4902, 310, 278, 28152, 3632, 3002, 16287, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 28152, 3632, 3002, 508, 367, 1304, 304, 4505, 341, 1475, 705, 11356, 29889, 29871, 2, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 1763, 2334, 263, 341, 1475, 705, 11356, 899, 369, 393, 3913, 21531, 20159, 297, 5132, 29892, 366, 723, 817, 304, 437, 278, 1494, 29901, 13, 13, 29896, 29889, 16052, 263, 5132, 3577, 363, 21531, 20159, 29889, 1670, 526, 3196, 3987, 3625, 29892, 1316, 408, 402, 566, 2918, 29892, 360, 291, 952, 375, 29892, 470, 4560, 7354, 29899, 29873, 1388, 29889, 4525, 9741, 3867, 14009, 322, 848, 12286, 363, 20602, 25002, 15939, 1934, 29892, 1316, 408, 3632, 327, 2270, 6471, 29892, 350, 9890, 3694, 29892, 470, 28152, 3632, 3002, 29889, 13, 29906, 29889, 16314, 278, 341, 1475, 705, 11356, 3748, 7613, 408, 263, 25002, 2913, 29889, 910, 1033, 367, 2309, 773, 263, 848, 3829, 4944, 491, 278, 21531, 20159, 3577, 29892, 1316, 408, 263, 3053, 506, 616, 4280, 470, 263, 478, 2035, 29367, 29899, 29934, 4512, 4280, 29889, 7806, 6862, 373, 278, 7613, 723, 3928, 304, 263, 1298, 297, 278, 2913, 29892, 322, 278, 29086, 373, 278, 7613, 723, 367, 9875, 408, 376, 5391, 267, 29908, 297, 278, 2913, 29889, 13, 29941, 29889, 11796, 29872, 278, 25002, 15939, 1934, 310, 278, 2913, 29889, 910, 1033, 367, 2309, 773, 14009, 4944, 491, 278, 21531, 20159, 3577, 29889, 1152, 1342, 29892, 366, 1033, 10272, 278, 28152, 3632, 3002, 310, 278, 2913, 773, 278, 28152, 3632, 3002, 5687, 29889, 13, 29946, 29889, 4803, 278, 2472, 515, 278, 25002, 15939, 1934, 304, 8161, 278, 14354, 310, 278, 29086, 373, 278, 7613, 29889, 910, 1033, 367, 2309, 773, 263, 10296, 310, 19475, 24481, 322, 8720, 29889, 1152, 1342, 29892, 565, 278, 28152, 3632, 3002, 14088, 393, 727, 526, 3023, 26532, 297, 278, 2913, 29892, 21050, 297, 263, 6862, 8267, 29892, 769, 366, 1033, 671, 263, 8424, 310, 775, 304, 10115, 393, 278, 29086, 526, 5982, 297, 278, 3023, 26995, 310, 278, 7613, 29889, 13, 29945, 29889, 4803, 445, 2472, 304, 4505, 278, 341, 1475, 705, 11356, 3748, 29889, 9038, 366, 1073, 278, 14354, 310, 278, 29086, 29892, 366, 1033, 671, 263, 8424, 310, 775, 304, 6336, 2828, 373, 278, 9109, 25256, 322, 2821, 278, 7613, 29889, 13, 13, 2776, 3236, 29892, 445, 338, 925, 697, 1950, 2948, 304, 16049, 263, 341, 1475, 705, 11356, 899, 369, 393, 3913, 21531, 20159, 297, 5132, 29889, 1670, 1122, 367, 916, 5837, 304, 437, 372, 29892, 8679, 373, 278, 2702, 4902, 310, 278, 5314, 29889, 2398, 29892, 445, 881, 2367, 366, 263, 2498, 2969, 310, 920, 304, 679, 4687, 411, 1316, 263, 2060, 29889, 29871, 2, 1, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100], "attention_mask": [true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, true, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false]} diff --git a/tests/monkeypatch/test_llama_attn_hijack_flash.py b/tests/monkeypatch/test_llama_attn_hijack_flash.py new file mode 100644 index 0000000000000000000000000000000000000000..4521cd07bc39e6bf8c3486ea6fdf6af258b8f3cb --- /dev/null +++ b/tests/monkeypatch/test_llama_attn_hijack_flash.py @@ -0,0 +1,113 @@ +""" +Unit tests for the monkeypatch utils +""" +import unittest + +import torch + +from axolotl.monkeypatch.utils import ( + get_cu_seqlens, + get_cu_seqlens_from_pos_ids, + get_max_seqlen_in_batch, + get_unpad_data, +) + + +class TestMonkeyPatchUtils(unittest.TestCase): + """ + Unit test class for monkeypatch utils + """ + + def test_get_cu_seqlens_1d(self): + attn_mask = torch.tensor([[1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 0, 0]]) + target_res = torch.tensor([0, 4, 7, 12, 14, 16], dtype=torch.int32) + self.assertTrue(torch.allclose(get_cu_seqlens(attn_mask)[0], target_res)) + + def test_get_cu_seqlens_from_pos_ids_1d(self): + position_ids = torch.tensor([[0, 1, 2, 3, 0, 1, 2, 0, 1, 2, 3, 4, 0, 1, 0, 0]]) + target_res = torch.tensor([0, 4, 7, 12, 14, 16], dtype=torch.int32) + self.assertTrue( + torch.allclose(get_cu_seqlens_from_pos_ids(position_ids)[0], target_res) + ) + + def test_get_cu_seqlens_from_pos_ids_2d(self): + position_ids = torch.tensor( + [ + [0, 1, 2, 3, 0, 1, 2, 0, 1, 2, 3, 4, 0, 1, 0, 0], + [0, 1, 2, 3, 4, 0, 1, 2, 0, 1, 2, 3, 4, 5, 6, 0], + ] + ) + target_res = torch.tensor( + [[0, 4, 7, 12, 14, 16], [0, 5, 8, 15, 16, 16]], dtype=torch.int32 + ) + self.assertTrue( + torch.allclose(get_cu_seqlens_from_pos_ids(position_ids)[0], target_res) + ) + + def test_get_max_seqlen_in_batch(self): + attn_mask = torch.tensor([[1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 0, 0]]) + target_res = torch.tensor([4, 3, 5, 2], dtype=torch.int32) + self.assertTrue(torch.allclose(get_max_seqlen_in_batch(attn_mask), target_res)) + + def test_get_unpad_data(self): + attn_mask = torch.tensor([[1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 0, 0]]) + target_indices = torch.tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]) + target_cu_seqlen = torch.tensor([0, 4, 7, 12, 14], dtype=torch.int32) + target_max_seqlen_in_batch = 5 + indices, cu_seqlen, max_seqlen_in_batch = get_unpad_data(attn_mask) + self.assertTrue(torch.allclose(target_indices, indices)) + self.assertTrue(torch.allclose(target_cu_seqlen, cu_seqlen)) + self.assertEqual(target_max_seqlen_in_batch, max_seqlen_in_batch) + + attn_mask = torch.tensor( + [ + [1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 0, 0], + [1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5], + ] + ) + target_indices = torch.tensor( + [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + ] + ) + target_cu_seqlen = torch.tensor( + [0, 4, 7, 12, 14, 17, 22, 24, 27, 30], dtype=torch.int32 + ) + target_max_seqlen_in_batch = 5 + indices, cu_seqlen, max_seqlen_in_batch = get_unpad_data(attn_mask) + self.assertTrue(torch.allclose(target_indices, indices)) + self.assertTrue(torch.allclose(target_cu_seqlen, cu_seqlen)) + self.assertEqual(target_max_seqlen_in_batch, max_seqlen_in_batch) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/prompt_strategies/test_alpaca.py b/tests/prompt_strategies/test_alpaca.py new file mode 100644 index 0000000000000000000000000000000000000000..9c97e4052134f325e5c41dd5d823f214c33c5c0d --- /dev/null +++ b/tests/prompt_strategies/test_alpaca.py @@ -0,0 +1,116 @@ +""" +Test module for alpaca integration w chatml +""" +import pytest +from datasets import Dataset +from tokenizers import AddedToken +from transformers import AutoTokenizer + +from axolotl.datasets import TokenizedPromptDataset +from axolotl.prompt_tokenizers import AlpacaPromptTokenizingStrategy +from axolotl.prompters import AlpacaPrompter, PromptStyle + + +@pytest.fixture(name="alpaca_dataset") +def fixture_alpaca_dataset(): + return Dataset.from_list( + [ + { + "instruction": "Evaluate this sentence for spelling and grammar mistakes", + "input": "He finnished his meal and left the resturant", + "output": "He finished his meal and left the restaurant.", + } + ] + ) + + +@pytest.fixture(name="tokenizer") +def fixture_tokenizer(): + # pylint: disable=all + tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1") + tokenizer.add_special_tokens( + { + "eos_token": AddedToken( + "<|im_end|>", rstrip=False, lstrip=False, normalized=False + ) + } + ) + tokenizer.add_tokens( + [ + AddedToken("<|im_start|>", rstrip=False, lstrip=False, normalized=False), + ] + ) + + return tokenizer + + +class TestAlpacaChatml: + """ + Test class for alpaca prompter + """ + + def test_no_double_im_end(self, alpaca_dataset, tokenizer): + strategy = AlpacaPromptTokenizingStrategy( + AlpacaPrompter(prompt_style=PromptStyle.CHATML.value), + tokenizer, + False, # train_on_inputs + 2048, # sequence_len + ) + + dataset_wrapper = TokenizedPromptDataset( + strategy, alpaca_dataset, process_count=1 + ) + + input_ids = dataset_wrapper[0]["input_ids"] + # fmt: off + assert input_ids == [ + 1, # Bos + 32001, 1587, 13, 20548, 336, 349, 396, 13126, 369, 13966, 264, 3638, 28725, 5881, 1360, 395, 396, 2787, 369, 5312, 3629, 2758, 28723, 12018, 264, 2899, 369, 6582, 1999, 2691, 274, 272, 2159, 28723, 32000, 28705, 13, # instruction + 32001, 2188, 13, 16627, 11931, 456, 12271, 354, 668, 3572, 304, 18756, 3479, 17179, 13, 2428, 854, 28711, 1497, 516, 11314, 304, 1749, 272, 1846, 324, 440, 32000, 28705, 13, # input + 32001, 13892, 13, 650, 5967, 516, 11314, 304, 1749, 272, 9926, 28723, 32000, # output + ] + # fmt: on + + def test_no_train_on_input(self, alpaca_dataset, tokenizer): + strategy = AlpacaPromptTokenizingStrategy( + AlpacaPrompter(prompt_style=PromptStyle.CHATML.value), + tokenizer, + False, # train_on_inputs + 2048, # sequence_len + ) + + dataset_wrapper = TokenizedPromptDataset( + strategy, alpaca_dataset, process_count=1 + ) + + labels = dataset_wrapper[0]["labels"] + # fmt: off + assert labels == [ + -100, # bos + -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, # instruction + -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, # input + -100, -100, -100, 650, 5967, 516, 11314, 304, 1749, 272, 9926, 28723, 32000, # Output + ] + # fmt: on + + def test_w_train_on_input(self, alpaca_dataset, tokenizer): + strategy = AlpacaPromptTokenizingStrategy( + AlpacaPrompter(prompt_style=PromptStyle.CHATML.value), + tokenizer, + True, # train_on_inputs + 2048, # sequence_len + ) + + dataset_wrapper = TokenizedPromptDataset( + strategy, alpaca_dataset, process_count=1 + ) + + labels = dataset_wrapper[0]["labels"] + # fmt: off + assert labels == [ + 1, # Bos + 32001, 1587, 13, 20548, 336, 349, 396, 13126, 369, 13966, 264, 3638, 28725, 5881, 1360, 395, 396, 2787, 369, 5312, 3629, 2758, 28723, 12018, 264, 2899, 369, 6582, 1999, 2691, 274, 272, 2159, 28723, 32000, 28705, 13, # instruction + 32001, 2188, 13, 16627, 11931, 456, 12271, 354, 668, 3572, 304, 18756, 3479, 17179, 13, 2428, 854, 28711, 1497, 516, 11314, 304, 1749, 272, 1846, 324, 440, 32000, 28705, 13, # input + 32001, 13892, 13, 650, 5967, 516, 11314, 304, 1749, 272, 9926, 28723, 32000, # output + ] + # fmt: on diff --git a/tests/prompt_strategies/test_raw_io.py b/tests/prompt_strategies/test_raw_io.py new file mode 100644 index 0000000000000000000000000000000000000000..8c66538ecf741ec53a09606af1b1b89d50a50099 --- /dev/null +++ b/tests/prompt_strategies/test_raw_io.py @@ -0,0 +1,116 @@ +""" +Test module for raw i/o data for prompts +""" +import pytest +from datasets import Dataset +from tokenizers import AddedToken +from transformers import AutoTokenizer + +from axolotl.datasets import TokenizedPromptDataset +from axolotl.prompt_strategies.input_output import ( + RawInputOutputPrompter, + RawInputOutputStrategy, +) + + +@pytest.fixture(name="segments_dataset") +def fixture_sharegpt_dataset(): + return Dataset.from_list( + [ + { + "segments": [ + { + "label": False, + "text": "hello ", + }, + { + "label": True, + "text": "hi there.", + }, + { + "label": False, + "text": "goodbye ", + }, + { + "label": True, + "text": "farewell", + }, + ] + } + ] + ) + + +@pytest.fixture(name="tokenizer") +def fixture_tokenizer(): + tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1") + tokenizer.add_tokens( + [ + AddedToken("", rstrip=False, lstrip=False, normalized=False), + ] + ) + + return tokenizer + + +class TestRawInputOutputPrompts: + """ + Test class for raw i/o prompter + """ + + def test_segment_prompts(self, segments_dataset, tokenizer): + strategy = RawInputOutputStrategy( + RawInputOutputPrompter(), + tokenizer, + False, # train_on_inputs + 2048, # sequence_len + ) + + dataset_wrapper = TokenizedPromptDataset( + strategy, segments_dataset, process_count=1 + ) + + input_ids = dataset_wrapper[0]["input_ids"] + labels = dataset_wrapper[0]["labels"] + + assert ( + tokenizer.decode(input_ids) + == " hello hi there. goodbye farewell" + ) + # fmt: off + assert input_ids == [ + 1, # + 6312, # hell + 28709, # o + 28705, # + 12014, # hi + 736, # there + 28723, # . + 32000, # + 1179, # good + 17664, # bye + 28705, # + 19111, # fare + 5458, # well + 32000, # + ] + # fmt: on + + # fmt: off + assert labels == [ + -100, # + -100, # hell + -100, # o + -100, # + 12014, # hi + 736, # there + 28723, # . + 32000, # + -100, # good + -100, # bye + -100, # + 19111, # fare + 5458, # well + 32000, # + ] + # fmt: on diff --git a/tests/prompt_strategies/test_sharegpt.py b/tests/prompt_strategies/test_sharegpt.py new file mode 100644 index 0000000000000000000000000000000000000000..19d63eac83652e00acef3d34bc1b517ce5a04ffa --- /dev/null +++ b/tests/prompt_strategies/test_sharegpt.py @@ -0,0 +1,266 @@ +""" +Test module for sharegpt integration w chatml +""" + +import pytest +from datasets import Dataset +from tokenizers import AddedToken +from transformers import AutoTokenizer + +from axolotl.datasets import TokenizedPromptDataset +from axolotl.prompt_strategies.sharegpt import ( + GlaiveShareGPTPromptTokenizingStrategy, + SimpleShareGPTPromptTokenizingStrategy, + register_chatml_template, +) +from axolotl.prompters import ShareGPTPrompterV2 + +register_chatml_template() + + +@pytest.fixture(name="sharegpt_dataset") +def fixture_sharegpt_dataset(): + return Dataset.from_list( + [ + { + "conversations": [ + { + "from": "system", + "value": "repeat", + }, + { + "from": "human", + "value": "hello", + }, + { + "from": "gpt", + "value": "hello", + }, + { + "from": "human", + "value": "goodbye", + }, + { + "from": "gpt", + "value": "goodbye", + }, + ] + } + ] + ) + + +@pytest.fixture(name="glaive_dataset") +def fixture_sharegpt_glaive_dataset(): + return Dataset.from_list( + [ + { + "system": "SYSTEM: This is a system prompt", + "chat": "USER: Can you book a flight for me from New York to London? ASSISTANT: I'm sorry, but I don't have the capability to book flights. <|endoftext|>", + } + ] + ) + + +@pytest.fixture(name="multi_role_dataset") +def fixture_multi_role_dataset(): + return Dataset.from_list( + [ + { + "conversations": [ + { + "from": "system", + "value": "use get_weather(city) to get the weather for a city", + }, + { + "from": "human", + "value": "hello, what's the weather in New York?", + }, + { + "from": "gpt", + "value": "let me get that for you", + }, + { + "from": "tool", + "value": "get_weather(New York)", + }, + { + "from": "gpt", + "value": "the weather in New York is 70 degrees and sunny", + }, + ] + } + ] + ) + + +@pytest.fixture(name="tokenizer") +def fixture_tokenizer(): + tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1") + tokenizer.add_special_tokens( + { + "eos_token": AddedToken( + "<|im_end|>", rstrip=False, lstrip=False, normalized=False + ) + } + ) + tokenizer.add_tokens( + [ + AddedToken("<|im_start|>", rstrip=False, lstrip=False, normalized=False), + ] + ) + + return tokenizer + + +class TestSharegpt: + """ + Test class for sharegpt prompter + """ + + def test_no_double_im_end(self, sharegpt_dataset, tokenizer): + strategy = SimpleShareGPTPromptTokenizingStrategy( + ShareGPTPrompterV2( + conversation="chatml", + role_key_model=None, + role_key_human=None, + ), + tokenizer, + False, # train_on_inputs + 2048, # sequence_len + ) + + dataset_wrapper = TokenizedPromptDataset( + strategy, sharegpt_dataset, process_count=1 + ) + + input_ids = dataset_wrapper[0]["input_ids"] + # fmt: off + assert input_ids == [ + # 28705, 13, is " \n" + 1, # bos + 32001, 1587, 13, 25997, 32000, 28705, 13, # system + 32001, 2188, 13, 21558, 32000, 28705, 13, # human + 32001, 13892, 13, 21558, 32000, 28705, 13, # gpt + 32001, 2188, 13, 12684, 17664, 32000, 28705, 13, # human + 32001, 13892, 13, 12684, 17664, 32000, 28705, 13, # gpt + ] + # fmt: on + + def test_w_train_on_input(self, sharegpt_dataset, tokenizer): + strategy = SimpleShareGPTPromptTokenizingStrategy( + ShareGPTPrompterV2( + conversation="chatml", + role_key_model=None, + role_key_human=None, + ), + tokenizer, + False, # train_on_inputs + 2048, # sequence_len + ) + + dataset_wrapper = TokenizedPromptDataset( + strategy, sharegpt_dataset, process_count=1 + ) + + labels = dataset_wrapper[0]["labels"] + # fmt: off + assert labels == [ + -100, # bos + -100, -100, -100, -100, -100, -100, -100, # system + -100, -100, -100, -100, -100, -100, -100, # human + -100, -100, 13, 21558, 32000, 28705, 13, # gpt + -100, -100, -100, -100, -100, -100, -100, -100, # human + -100, -100, 13, 12684, 17664, 32000, 28705, 13, # gpt + ] + # fmt: on + + def test_no_train_on_input(self, sharegpt_dataset, tokenizer): + strategy = SimpleShareGPTPromptTokenizingStrategy( + ShareGPTPrompterV2( + conversation="chatml", + role_key_model=None, + role_key_human=None, + ), + tokenizer, + True, # train_on_inputs + 2048, # sequence_len + ) + + dataset_wrapper = TokenizedPromptDataset( + strategy, sharegpt_dataset, process_count=1 + ) + + labels = dataset_wrapper[0]["labels"] + # fmt: off + assert labels == [ + 1, # bos + 32001, 1587, 13, 25997, 32000, 28705, 13, # system + 32001, 2188, 13, 21558, 32000, 28705, 13, # human + 32001, 13892, 13, 21558, 32000, 28705, 13, # gpt + 32001, 2188, 13, 12684, 17664, 32000, 28705, 13, # human + 32001, 13892, 13, 12684, 17664, 32000, 28705, 13, # gpt + ] + # fmt: on + + def test_chatml_glaive(self, glaive_dataset, tokenizer): + strategy = GlaiveShareGPTPromptTokenizingStrategy( + ShareGPTPrompterV2( + conversation="chatml", + role_key_model=None, + role_key_human=None, + ), + tokenizer, + True, # train_on_inputs + 2048, # sequence_len + ) + + dataset_wrapper = TokenizedPromptDataset( + strategy, glaive_dataset, process_count=1 + ) + + labels = dataset_wrapper[0]["labels"] + # fmt: off + assert labels == [ + 1, # bos + 32001, 1587, 13, 3260, 349, 264, 1587, 11510, 32000, 28705, 13, # system + 32001, 2188, 13, 6325, 368, 1820, 264, 9314, 354, 528, 477, 1450, 2726, 298, 4222, 28804, 32000, 28705, 13, # human + 32001, 13892, 13, 28737, 28742, 28719, 7371, 28725, 562, 315, 949, 28742, 28707, 506, 272, 21368, 298, 1820, 22447, 28723, 28705, 523, 28766, 416, 1009, 772, 28766, 28767, 32000, 28705, 13 # gpt + ] + # fmt: on + + def test_multi_role_dataset(self, multi_role_dataset, tokenizer): + strategy = SimpleShareGPTPromptTokenizingStrategy( + ShareGPTPrompterV2(conversation="chatml", roles={"input": ["tool"]}), + tokenizer, + False, # train_on_inputs + 2048, # sequence_len + ) + + dataset_wrapper = TokenizedPromptDataset( + strategy, multi_role_dataset, process_count=1 + ) + + input_ids = dataset_wrapper[0]["input_ids"] + # fmt: off + assert input_ids == [ + 1, # bos + 32001, 1587, 13, 1730, 625, 28730, 769, 1223, 28732, 18373, 28731, 298, 625, 272, 8086, 354, 264, 2990, 32000, 28705, 13, # system + 32001, 2188, 13, 21558, 28725, 767, 28742, 28713, 272, 8086, 297, 1450, 2726, 28804, 32000, 28705, 13, # human + 32001, 13892, 13, 895, 528, 625, 369, 354, 368, 32000, 28705, 13, # gpt + 32001, 3921, 13, 527, 28730, 769, 1223, 28732, 2972, 2726, 28731, 32000, 28705, 13, # tool + 32001, 13892, 13, 1237, 8086, 297, 1450, 2726, 349, 28705, 28787, 28734, 11182, 304, 4376, 1780, 32000, 28705, 13 # gpt + ] + # fmt: on + + labels = dataset_wrapper[0]["labels"] + # fmt: off + assert labels == [ + -100, # bos + -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, # system + -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, # human + -100, -100, 13, 895, 528, 625, 369, 354, 368, 32000, 28705, 13, # gpt + -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, # tool + -100, -100, 13, 1237, 8086, 297, 1450, 2726, 349, 28705, 28787, 28734, 11182, 304, 4376, 1780, 32000, 28705, 13 # gpt + ] + # fmt: on diff --git a/tests/test_data.py b/tests/test_data.py new file mode 100644 index 0000000000000000000000000000000000000000..16af089a063880b75b8ad1451de1349b9ffdee63 --- /dev/null +++ b/tests/test_data.py @@ -0,0 +1,64 @@ +""" +test module for the axolotl.utis.data module +""" +import unittest + +from transformers import LlamaTokenizer + +from axolotl.utils.data import encode_pretraining, md5 + + +class TestEncodePretraining(unittest.TestCase): + """ + test class for encode pretraining and md5 helper + """ + + def setUp(self): + self.tokenizer = LlamaTokenizer.from_pretrained("huggyllama/llama-7b") + self.tokenizer.add_special_tokens( + { + "eos_token": "", + "bos_token": "", + "unk_token": "", + "pad_token": "", + } + ) + self.max_tokens = 15 # set a small number for easy inspection + + def test_encode_pretraining(self): + examples = { + "text": [ + "Hello, world!", + "Nice to meet you.", + "lorem ipsum dolor sit amet.", + "Nice to meet you again!.", + "hello, hello", + ] + } + result = encode_pretraining(self.tokenizer, self.max_tokens, examples["text"]) + + self.assertEqual(len(result["input_ids"]), 3) + + # Assert the length of input_ids and attention_mask is correct + self.assertEqual(len(result["input_ids"][0]), self.max_tokens) + self.assertEqual(len(result["attention_mask"][0]), self.max_tokens) + + # Assert EOS and PAD tokens are correctly added + # hello world! is 4 tokens + self.assertEqual(result["input_ids"][0][0], self.tokenizer.bos_token_id) + self.assertEqual(result["input_ids"][0][5], self.tokenizer.eos_token_id) + self.assertEqual(result["input_ids"][0][6], self.tokenizer.pad_token_id) + # second part, 5 tokens + self.assertEqual(result["input_ids"][0][7], self.tokenizer.bos_token_id) + self.assertEqual(result["input_ids"][0][13], self.tokenizer.eos_token_id) + self.assertEqual(result["input_ids"][0][14], self.tokenizer.pad_token_id) + + def test_md5(self): + self.assertEqual(md5("hello world"), "5eb63bbbe01eeed093cb22bb8f5acdc3") + self.assertEqual( + md5("hello world", "utf-8"), "5eb63bbbe01eeed093cb22bb8f5acdc3" + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_dict.py b/tests/test_dict.py new file mode 100644 index 0000000000000000000000000000000000000000..2007cb085e17226249e60c6994f147b3cbc767ca --- /dev/null +++ b/tests/test_dict.py @@ -0,0 +1,104 @@ +"""Module for testing DictDefault class""" + + +import unittest + +import pytest + +from axolotl.utils.dict import DictDefault + + +class DictDefaultTest(unittest.TestCase): + """ + Test DictDefault class + """ + + def test_dict_default(self): + cfg = DictDefault( + { + "key_a": {"key_b": "value_a"}, + "key_c": "value_c", + "key_d": ["value_d", "value_e"], + } + ) + + assert ( + cfg.key_a.key_b == "value_a" + ), "DictDefault should return value for existing nested keys" + + assert ( + cfg.key_c == "value_c" + ), "DictDefault should return value for existing keys" + + assert ( + cfg.key_d[0] == "value_d" + ), "DictDefault should return value for existing keys in list" + + assert ( + "value_e" in cfg.key_d + ), "DictDefault should support in operator for existing keys in list" + + def test_dict_or_operator(self): + cfg = DictDefault({"key_a": {"key_b": "value_b"}, "key_f": "value_g"}) + + cfg = cfg | DictDefault( # pylint: disable=unsupported-binary-operation + { + "key_a": {"key_b": "value_a"}, + "key_c": "value_c", + "key_d": ["value_d", "value_e"], + "key_f": "value_f", + } + ) + + assert ( + cfg.key_a.key_b == "value_b" + ), "DictDefault should support OR operator for existing nested keys" + + assert cfg.key_c == "value_c", "DictDefault should not delete existing key" + + assert cfg.key_d == [ + "value_d", + "value_e", + ], "DictDefault should not overwrite existing keys in list" + + assert ( + cfg.key_f == "value_g" + ), "DictDefault should support OR operator for existing key" + + def test_dict_missingkey(self): + cfg = DictDefault({}) + + assert cfg.random_key is None, "DictDefault should return None for missing keys" + + def test_dict_or(self): + cfg = DictDefault({}) | DictDefault({}) + + assert ( + cfg.random_key is None + ), "DictDefault should return None for missing keys after | operation" + + def test_dict_nested_missingparentkey(self): + """ + Due to subclassing Dict, DictDefault will error if we try to access a nested key whose parent key does not exist. + """ + cfg = DictDefault({}) + + with pytest.raises( + AttributeError, + match=r"'NoneType' object has no attribute 'another_random_key'", + ): + cfg.random_key.another_random_key = "value" + + def test_dict_shorthand_assignment(self): + """ + Shorthand assignment is said to not be supported if subclassed. However, their example raises error instead of None. + This test ensures that it is supported for current implementation. + + Ref: https://github.com/mewwts/addict#default-values + """ + + cfg = DictDefault({"key_a": {"key_b": "value_a"}}) + + cfg.key_a.key_b = "value_b" + + assert cfg.key_a.key_b == "value_b", "Shorthand assignment should be supported" diff --git a/tests/test_expand_mask.py b/tests/test_expand_mask.py new file mode 100644 index 0000000000000000000000000000000000000000..01241c2958b14460cb5cd3ba91671fc05759dda3 --- /dev/null +++ b/tests/test_expand_mask.py @@ -0,0 +1,44 @@ +""" +Unit tests for the monkey patch for expand mask to handle packed sequences +""" +import unittest + +import torch + +from axolotl.monkeypatch.llama_expand_mask import _expand_mask + + +class TestExpandMask(unittest.TestCase): + """ + Test class for attention mask expansion for packed sequences + """ + + def test_output(self): + mask = torch.tensor([[1, 1, 1, 2], [2, 3, 3, 0]]) + dtype = torch.float32 + expected_output = torch.tensor( + [ + [ + [ + [0.0000e00, -3.4028e38, -3.4028e38, -3.4028e38], + [0.0000e00, 0.0000e00, -3.4028e38, -3.4028e38], + [0.0000e00, 0.0000e00, 0.0000e00, -3.4028e38], + [-3.4028e38, -3.4028e38, -3.4028e38, 0.0000e00], + ] + ], + [ + [ + [0.0000e00, -3.4028e38, -3.4028e38, -3.4028e38], + [-3.4028e38, 0.0000e00, -3.4028e38, -3.4028e38], + [-3.4028e38, 0.0000e00, 0.0000e00, -3.4028e38], + [-3.4028e38, -3.4028e38, -3.4028e38, -3.4028e38], + ] + ], + ] + ) + # Check that the output matches the expected output + self.assertTrue(torch.allclose(_expand_mask(mask, dtype), expected_output)) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_freeze.py b/tests/test_freeze.py new file mode 100644 index 0000000000000000000000000000000000000000..49d30ba5face906bb152667a85e2b4a40e687d0a --- /dev/null +++ b/tests/test_freeze.py @@ -0,0 +1,285 @@ +""" +This module contains unit tests for the `freeze_layers_except` function. + +The `freeze_layers_except` function is used to freeze layers in a model, except for the specified layers. +The unit tests in this module verify the behavior of the `freeze_layers_except` function in different scenarios. +""" + +import unittest + +import torch +from torch import nn + +from axolotl.utils.freeze import freeze_layers_except + +ZERO = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] +ONE_TO_TEN = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0] + + +class TestFreezeLayersExcept(unittest.TestCase): + """ + A test case class for the `freeze_layers_except` function. + """ + + def setUp(self): + self.model = _TestModel() + + def test_freeze_layers_with_dots_in_name(self): + freeze_layers_except(self.model, ["features.layer"]) + self.assertTrue( + self.model.features.layer.weight.requires_grad, + "model.features.layer should be trainable.", + ) + self.assertFalse( + self.model.classifier.weight.requires_grad, + "model.classifier should be frozen.", + ) + + def test_freeze_layers_without_dots_in_name(self): + freeze_layers_except(self.model, ["classifier"]) + self.assertFalse( + self.model.features.layer.weight.requires_grad, + "model.features.layer should be trainable.", + ) + self.assertTrue( + self.model.classifier.weight.requires_grad, + "model.classifier should be frozen.", + ) + + def test_freeze_layers_regex_patterns(self): + # The second pattern cannot match because only characters 'a' to 'c' are allowed after the word 'class', whereas it should be matching the character 'i'. + freeze_layers_except(self.model, [r"^features.[a-z]+.weight$", r"class[a-c]+"]) + self.assertTrue( + self.model.features.layer.weight.requires_grad, + "model.features.layer should be trainable.", + ) + self.assertFalse( + self.model.classifier.weight.requires_grad, + "model.classifier should be frozen.", + ) + + def test_all_layers_frozen(self): + freeze_layers_except(self.model, []) + self.assertFalse( + self.model.features.layer.weight.requires_grad, + "model.features.layer should be frozen.", + ) + self.assertFalse( + self.model.classifier.weight.requires_grad, + "model.classifier should be frozen.", + ) + + def test_all_layers_unfrozen(self): + freeze_layers_except(self.model, ["features.layer", "classifier"]) + self.assertTrue( + self.model.features.layer.weight.requires_grad, + "model.features.layer should be trainable.", + ) + self.assertTrue( + self.model.classifier.weight.requires_grad, + "model.classifier should be trainable.", + ) + + def test_freeze_layers_with_range_pattern_start_end(self): + freeze_layers_except(self.model, ["features.layer[1:5]"]) + self.assertTrue( + self.model.features.layer.weight.requires_grad, + "model.features.layer should be trainable.", + ) + self.assertFalse( + self.model.classifier.weight.requires_grad, + "model.classifier should be frozen.", + ) + + self._assert_gradient_output( + [ + ZERO, + ONE_TO_TEN, + ONE_TO_TEN, + ONE_TO_TEN, + ONE_TO_TEN, + ZERO, + ZERO, + ZERO, + ZERO, + ZERO, + ] + ) + + def test_freeze_layers_with_range_pattern_single_index(self): + freeze_layers_except(self.model, ["features.layer[5]"]) + self.assertTrue( + self.model.features.layer.weight.requires_grad, + "model.features.layer should be trainable.", + ) + self.assertFalse( + self.model.classifier.weight.requires_grad, + "model.classifier should be frozen.", + ) + + self._assert_gradient_output( + [ZERO, ZERO, ZERO, ZERO, ZERO, ONE_TO_TEN, ZERO, ZERO, ZERO, ZERO] + ) + + def test_freeze_layers_with_range_pattern_start_omitted(self): + freeze_layers_except(self.model, ["features.layer[:5]"]) + self.assertTrue( + self.model.features.layer.weight.requires_grad, + "model.features.layer should be trainable.", + ) + self.assertFalse( + self.model.classifier.weight.requires_grad, + "model.classifier should be frozen.", + ) + + self._assert_gradient_output( + [ + ONE_TO_TEN, + ONE_TO_TEN, + ONE_TO_TEN, + ONE_TO_TEN, + ONE_TO_TEN, + ZERO, + ZERO, + ZERO, + ZERO, + ZERO, + ] + ) + + def test_freeze_layers_with_range_pattern_end_omitted(self): + freeze_layers_except(self.model, ["features.layer[4:]"]) + self.assertTrue( + self.model.features.layer.weight.requires_grad, + "model.features.layer should be trainable.", + ) + self.assertFalse( + self.model.classifier.weight.requires_grad, + "model.classifier should be frozen.", + ) + + self._assert_gradient_output( + [ + ZERO, + ZERO, + ZERO, + ZERO, + ONE_TO_TEN, + ONE_TO_TEN, + ONE_TO_TEN, + ONE_TO_TEN, + ONE_TO_TEN, + ONE_TO_TEN, + ] + ) + + def test_freeze_layers_with_range_pattern_merge_included(self): + freeze_layers_except(self.model, ["features.layer[4:]", "features.layer[5:6]"]) + self.assertTrue( + self.model.features.layer.weight.requires_grad, + "model.features.layer should be trainable.", + ) + self.assertFalse( + self.model.classifier.weight.requires_grad, + "model.classifier should be frozen.", + ) + + self._assert_gradient_output( + [ + ZERO, + ZERO, + ZERO, + ZERO, + ONE_TO_TEN, + ONE_TO_TEN, + ONE_TO_TEN, + ONE_TO_TEN, + ONE_TO_TEN, + ONE_TO_TEN, + ] + ) + + def test_freeze_layers_with_range_pattern_merge_intersect(self): + freeze_layers_except(self.model, ["features.layer[4:7]", "features.layer[6:8]"]) + self.assertTrue( + self.model.features.layer.weight.requires_grad, + "model.features.layer should be trainable.", + ) + self.assertFalse( + self.model.classifier.weight.requires_grad, + "model.classifier should be frozen.", + ) + + self._assert_gradient_output( + [ + ZERO, + ZERO, + ZERO, + ZERO, + ONE_TO_TEN, + ONE_TO_TEN, + ONE_TO_TEN, + ONE_TO_TEN, + ZERO, + ZERO, + ] + ) + + def test_freeze_layers_with_range_pattern_merge_separate(self): + freeze_layers_except( + self.model, + ["features.layer[1:2]", "features.layer[3:4]", "features.layer[5:6]"], + ) + self.assertTrue( + self.model.features.layer.weight.requires_grad, + "model.features.layer should be trainable.", + ) + self.assertFalse( + self.model.classifier.weight.requires_grad, + "model.classifier should be frozen.", + ) + + self._assert_gradient_output( + [ + ZERO, + ONE_TO_TEN, + ZERO, + ONE_TO_TEN, + ZERO, + ONE_TO_TEN, + ZERO, + ZERO, + ZERO, + ZERO, + ] + ) + + def _assert_gradient_output(self, expected): + input_tensor = torch.tensor([ONE_TO_TEN], dtype=torch.float32) + + self.model.features.layer.weight.grad = None # Reset gradients + output = self.model.features.layer(input_tensor) + loss = output.sum() + loss.backward() + + expected_grads = torch.tensor(expected) + torch.testing.assert_close( + self.model.features.layer.weight.grad, expected_grads + ) + + +class _SubLayerModule(nn.Module): + def __init__(self): + super().__init__() + self.layer = nn.Linear(10, 10) + + +class _TestModel(nn.Module): + def __init__(self): + super().__init__() + self.features = _SubLayerModule() + self.classifier = nn.Linear(10, 2) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_normalize_config.py b/tests/test_normalize_config.py new file mode 100644 index 0000000000000000000000000000000000000000..2e76ceb45d336c961b2e886c77b7b14df53a3394 --- /dev/null +++ b/tests/test_normalize_config.py @@ -0,0 +1,91 @@ +""" +Test classes for checking functionality of the cfg normalization +""" +import unittest +from unittest.mock import patch + +from axolotl.utils.config import normalize_cfg_datasets, normalize_config +from axolotl.utils.dict import DictDefault + + +class NormalizeConfigTestCase(unittest.TestCase): + """ + test class for normalize_config checks + """ + + def _get_base_cfg(self): + return DictDefault( + { + "base_model": "JackFram/llama-68m", + "base_model_config": "JackFram/llama-68m", + "tokenizer_type": "LlamaTokenizer", + "num_epochs": 1, + "micro_batch_size": 1, + "gradient_accumulation_steps": 1, + } + ) + + def test_base_model_config_set_when_empty(self): + cfg = self._get_base_cfg() + del cfg.base_model_config + normalize_config(cfg) + + assert cfg.base_model_config == cfg.base_model + + def test_chat_template_chatml(self): + cfg = DictDefault( + { + "chat_template": "chatml", + "datasets": [ + { + "path": "lorem/ipsum", + "type": "sharegpt", + "conversation": "vicuna_v1.1", + }, + { + "path": "sit/amet", + "type": "sharegpt", + }, + ], + } + ) + + normalize_cfg_datasets(cfg) + + assert cfg.datasets[0].conversation == "vicuna_v1.1" + assert cfg.datasets[1].conversation == "chatml" + + @patch("axolotl.utils.config.is_torch_bf16_gpu_available") + def test_bf16_auto_setter_available(self, mock_bf16_avail): + cfg = self._get_base_cfg() + cfg.bf16 = "auto" + mock_bf16_avail.return_value = True + + normalize_config(cfg) + + self.assertTrue(cfg.bf16) + self.assertFalse(cfg.fp16) + + @patch("axolotl.utils.config.is_torch_bf16_gpu_available") + def test_bf16_auto_setter_not_available(self, mock_bf16_avail): + cfg = self._get_base_cfg() + cfg.bf16 = "auto" + cfg.fp16 = None + mock_bf16_avail.return_value = False + + normalize_config(cfg) + + self.assertFalse(cfg.bf16) + self.assertTrue(cfg.fp16) + + @patch("axolotl.utils.config.is_torch_bf16_gpu_available") + def test_bf16_disables_fp16(self, mock_bf16_avail): + cfg = self._get_base_cfg() + cfg.bf16 = True + cfg.fp16 = False + mock_bf16_avail.return_value = True + + normalize_config(cfg) + + self.assertTrue(cfg.bf16) + self.assertFalse(cfg.fp16) diff --git a/tests/test_packed_batch_sampler.py b/tests/test_packed_batch_sampler.py new file mode 100644 index 0000000000000000000000000000000000000000..50f39d60f5f0a09a05796ba87592dc5f426b4446 --- /dev/null +++ b/tests/test_packed_batch_sampler.py @@ -0,0 +1,99 @@ +"""Module for testing streaming dataset sequence packing""" +import pytest +from datasets import concatenate_datasets, load_dataset +from torch.utils.data import DataLoader, RandomSampler +from transformers import AutoTokenizer + +from axolotl.datasets import TokenizedPromptDataset +from axolotl.prompt_strategies.completion import load +from axolotl.utils.collators import V2BatchSamplerDataCollatorForSeq2Seq +from axolotl.utils.dict import DictDefault +from axolotl.utils.samplers import MultipackBatchSampler, get_dataset_lengths + + +@pytest.fixture(name="tokenizer") +def fixture_tokenizer(): + tokenizer = AutoTokenizer.from_pretrained("huggyllama/llama-7b") + tokenizer.pad_token = "" + return tokenizer + + +@pytest.fixture(name="max_seq_length") +def fixture_max_seq_length(): + return 4096 + + +class TestBatchedSamplerPacking: + """ + Test class for packing streaming dataset sequences + """ + + @pytest.mark.parametrize( + "batch_size, num_workers", + [ + (1, 0), + (2, 0), + (1, 2), + (2, 2), + ], + ) + def test_packing(self, batch_size, num_workers, tokenizer, max_seq_length): + import axolotl.monkeypatch.data.batch_dataset_fetcher # pylint: disable=unused-import # noqa: F401 + + dataset = load_dataset( + "Trelis/tiny-shakespeare", + split="train", + ) + + cfg = DictDefault( + { + "train_on_inputs": True, + "sequence_len": max_seq_length, + } + ) + ds_cfg = DictDefault( + { + "field": "Text", + } + ) + completion_strategy = load(tokenizer, cfg, ds_cfg) + dataset_wrapper = TokenizedPromptDataset( + completion_strategy, + dataset, + ) + train_dataset = concatenate_datasets([dataset_wrapper]) + batch_sampler = MultipackBatchSampler( + sampler=RandomSampler(train_dataset), + batch_size=batch_size, + drop_last=True, + batch_max_len=max_seq_length, + lengths=get_dataset_lengths(train_dataset), + ) + + loader = DataLoader( + train_dataset, + batch_sampler=batch_sampler, + collate_fn=V2BatchSamplerDataCollatorForSeq2Seq( # pylint: disable=unexpected-keyword-arg + tokenizer=tokenizer, + padding=True, + pad_to_multiple_of=max_seq_length, + return_tensors="pt", + ), + num_workers=num_workers, + ) + inputs = next(iter(loader)) + + assert inputs["input_ids"].shape == (batch_size, max_seq_length) + assert inputs["labels"].shape == (batch_size, max_seq_length) + assert inputs["attention_mask"].shape == (batch_size, max_seq_length) + + assert inputs["input_ids"].tolist()[0][0] == 2 + assert inputs["labels"].tolist()[0][0] == -100 + assert inputs["attention_mask"].tolist()[0][0] == 0 + assert inputs["attention_mask"].tolist()[0][-1] > 1 + + if batch_size >= 2: + assert inputs["input_ids"].tolist()[1][0] == 2 + assert inputs["labels"].tolist()[1][0] == -100 + assert inputs["attention_mask"].tolist()[1][0] == 0 + assert inputs["attention_mask"].tolist()[1][-1] > 1 diff --git a/tests/test_packed_dataset.py b/tests/test_packed_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..da8fb7a937c37b77ac4acf034397cf49ef4bb0dc --- /dev/null +++ b/tests/test_packed_dataset.py @@ -0,0 +1,69 @@ +"""Module for testing dataset sequence packing""" + +import unittest +from pathlib import Path + +from datasets import Dataset, load_dataset +from transformers import AutoTokenizer + +from axolotl.datasets import ConstantLengthDataset, TokenizedPromptDataset +from axolotl.prompt_tokenizers import AlpacaPromptTokenizingStrategy +from axolotl.prompters import AlpacaPrompter + + +class TestPacking(unittest.TestCase): + """ + Test class for packing dataset sequences + """ + + def setUp(self) -> None: + # pylint: disable=duplicate-code + self.tokenizer = AutoTokenizer.from_pretrained("huggyllama/llama-7b") + self.tokenizer.add_special_tokens( + { + "bos_token": "", + "eos_token": "", + "unk_token": "", + } + ) + + def test_increments_attention(self): + prompter = AlpacaPrompter("chat") + strat = AlpacaPromptTokenizingStrategy( + prompter, + self.tokenizer, + False, + 2048, + ) + dateset = load_dataset( + "json", + data_files=str(Path(__file__).parent / "fixtures/alpaca/alpaca.json"), + )["train"] + dataset = Dataset.from_list(list(TokenizedPromptDataset(strat, dateset))) + + constant_len_dataset = ConstantLengthDataset( + self.tokenizer, + [dataset], + seq_length=2048, + ) + packed_dataset = Dataset.from_list(list(constant_len_dataset)) + example = packed_dataset[0] + next_bos_index = ( + example["input_ids"][1:].index(self.tokenizer.bos_token_id) + 1 + ) # add one since we sliced + + # first example doesn't have mask reset + assert example["input_ids"][0] == self.tokenizer.bos_token_id + assert example["attention_mask"][0] == 1 + assert example["position_ids"][0] == 0 + assert example["position_ids"][1] == 1 + + # but subsequent one does + assert example["input_ids"][next_bos_index] == self.tokenizer.bos_token_id + assert example["attention_mask"][next_bos_index] == 2 + assert example["position_ids"][next_bos_index] == 0 + assert example["position_ids"][next_bos_index + 1] == 1 + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_packed_pretraining.py b/tests/test_packed_pretraining.py new file mode 100644 index 0000000000000000000000000000000000000000..528f9c8074842a31126e446bb8ec642b3033a9e1 --- /dev/null +++ b/tests/test_packed_pretraining.py @@ -0,0 +1,93 @@ +"""Module for testing streaming dataset sequence packing""" +import functools +import unittest + +import torch +from datasets import load_dataset +from torch.utils.data import DataLoader +from transformers import AutoTokenizer + +from axolotl.utils.data import get_dataset_wrapper, wrap_pretraining_dataset +from axolotl.utils.dict import DictDefault + + +class TestPretrainingPacking(unittest.TestCase): + """ + Test class for packing streaming dataset sequences + """ + + def setUp(self) -> None: + # pylint: disable=duplicate-code + self.tokenizer = AutoTokenizer.from_pretrained("huggyllama/llama-7b") + self.tokenizer.pad_token = "" + + def test_packing_stream_dataset(self): + # pylint: disable=duplicate-code + dataset = load_dataset( + "c4", + "en", + streaming=True, + )["train"] + + cfg = DictDefault( + { + "pretraining_dataset": [ + { + "path": "c4", + "name": "en", + "type": "pretrain", + } + ], + "sample_packing": True, + "pad_to_sequence_len": True, + "sequence_len": 2048, + "micro_batch_size": 2, + } + ) + + ds_wrapper_partial = functools.partial( + get_dataset_wrapper, + cfg.pretraining_dataset[0], + self.tokenizer, + cfg, + cfg.pretraining_dataset[0]["type"] or "pretrain", + ) + + original_bsz = cfg.micro_batch_size + train_dataset = wrap_pretraining_dataset( + dataset, + self.tokenizer, + cfg, + ds_wrapper_partial, + max_tokens=cfg.sequence_len, + batch_size=cfg.micro_batch_size, + seed=cfg.seed or 42, + ) + + trainer_loader = DataLoader( + train_dataset, + batch_size=1, + collate_fn=None, + drop_last=True, + ) + idx = 0 + for data in trainer_loader: + if idx > 10: + break + assert data["input_ids"].shape == torch.Size( + [1, original_bsz * cfg.sequence_len] + ) + assert data["position_ids"].shape == torch.Size( + [1, original_bsz * cfg.sequence_len] + ) + assert data["labels"].shape == torch.Size( + [1, original_bsz * cfg.sequence_len] + ) + assert data["attention_mask"].shape == torch.Size( + [1, original_bsz * cfg.sequence_len] + ) + idx += 1 + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_prompt_tokenizers.py b/tests/test_prompt_tokenizers.py new file mode 100644 index 0000000000000000000000000000000000000000..4e659006fe19dc4e44c7f3dce546211d02d93733 --- /dev/null +++ b/tests/test_prompt_tokenizers.py @@ -0,0 +1,505 @@ +"""Module for testing prompt tokenizers.""" + +import json +import logging +import unittest +from copy import deepcopy +from pathlib import Path +from typing import Optional + +import pytest +from datasets import load_dataset +from transformers import AddedToken, AutoTokenizer, LlamaTokenizer + +from axolotl.prompt_strategies.alpaca_chat import NoSystemPrompter +from axolotl.prompt_strategies.alpaca_w_system import ( + InstructionWSystemPromptTokenizingStrategy, + SystemDataPrompter, +) +from axolotl.prompt_strategies.llama2_chat import ( + Llama2ChatPrompter, + LLama2ChatTokenizingStrategy, +) +from axolotl.prompt_strategies.orpo.chat_template import load +from axolotl.prompt_strategies.sharegpt import GlaiveShareGPTPromptTokenizingStrategy +from axolotl.prompt_tokenizers import ( + AlpacaPromptTokenizingStrategy, + ShareGPTPromptTokenizingStrategy, +) +from axolotl.prompters import AlpacaPrompter, PromptStyle, ShareGPTPrompterV2 +from axolotl.utils.dict import DictDefault + +LOG = logging.getLogger("axolotl") + +test_data = { + "multi_turn_sys": { + "conversations": [ + {"from": "system", "value": "lorem"}, + {"from": "human", "value": "abc"}, + {"from": "gpt", "value": "ipsum"}, + {"from": "human", "value": "123"}, + {"from": "gpt", "value": "sit"}, + ] + }, + "single_turn_sys": { + "conversations": [ + {"from": "system", "value": "lorem"}, + {"from": "human", "value": "abc"}, + {"from": "gpt", "value": "ipsum"}, + ] + }, + "single_turn_no_sys": { + "conversations": [ + {"from": "human", "value": "abc"}, + {"from": "gpt", "value": "ipsum"}, + ] + }, + "multi_turn_no_sys": { + "conversations": [ + {"from": "human", "value": "abc"}, + {"from": "gpt", "value": "ipsum"}, + {"from": "human", "value": "123"}, + {"from": "gpt", "value": "sit"}, + ] + }, +} + + +def prompt_strat(conversation, tokenizer): + "Helper function to create a prompt strategy for testing." + prompter = ShareGPTPrompterV2(conversation=conversation) + return ShareGPTPromptTokenizingStrategy( + prompter, + tokenizer, + False, + 2048, + ) + + +class TestPromptTokenizationStrategies(unittest.TestCase): + """ + Test class for prompt tokenization strategies. + """ + + _caplog: Optional[pytest.LogCaptureFixture] = None + + @pytest.fixture(autouse=True) + def inject_fixtures(self, caplog): + self._caplog = caplog + + def setUp(self) -> None: + # pylint: disable=duplicate-code + self.tokenizer = AutoTokenizer.from_pretrained("huggyllama/llama-7b") + self.tokenizer.add_special_tokens( + { + "bos_token": "", + "eos_token": "", + "unk_token": "", + } + ) + + def test_sharegpt_integration(self): + with open( + Path(__file__).parent / "fixtures/conversation.json", encoding="utf-8" + ) as fin: + data = fin.read() + conversation = json.loads(data) + with open( + Path(__file__).parent / "fixtures/conversation.tokenized.json", + encoding="utf-8", + ) as fin: + data = fin.read() + tokenized_conversation = json.loads(data) + prompter = ShareGPTPrompterV2() + strat = ShareGPTPromptTokenizingStrategy( + prompter, + self.tokenizer, + False, + 2048, + ) + example = strat.tokenize_prompt(conversation) + for fields in ["input_ids", "attention_mask", "labels"]: + self.assertEqual(len(example[fields]), len(tokenized_conversation[fields])) + self.assertEqual(example[fields], tokenized_conversation[fields]) + + def test_sharegpt_warnings_integration(self): + with open( + Path(__file__).parent / "fixtures/conversation.missingturns.json", + encoding="utf-8", + ) as fin: + data = fin.read() + conversation = json.loads(data) + prompter = ShareGPTPrompterV2() + strat = ShareGPTPromptTokenizingStrategy( + prompter, + self.tokenizer, + False, + 2048, + ) + with self._caplog.at_level(logging.WARNING): + strat.tokenize_prompt(conversation) + assert "assistant turn has empty text" in self._caplog.records[1].message + + def test_sharegpt_warnings_turns(self): + conversation = { + "conversations": [ + {"from": "system", "value": "lorem"}, + {"from": "gpt", "value": "ipsum"}, + {"from": "human", "value": "dolor"}, + {"from": "human", "value": "dolor"}, + {"from": "gpt", "value": "sit"}, + ] + } + prompter = ShareGPTPrompterV2() + strat = ShareGPTPromptTokenizingStrategy( + prompter, + self.tokenizer, + False, + 2048, + ) + with self._caplog.at_level(logging.WARNING): + strat.tokenize_prompt(conversation) + assert ( + "Role did not alternate between turns (gpt and human)" + in self._caplog.records[0].message + ) + + def test_sharegpt_llama(self): + "Make sure the sharegpt/llama is tokenized and formatted correctly." + strat = prompt_strat("llama-2", self.tokenizer) + + def tokenize(conv): + return strat.tokenize_prompt(deepcopy(conv))["input_ids"] + + def decode(ids): + return strat.tokenizer.decode(ids) + + # fmt: off + # System message, multi-turn conversations + mt_ids = tokenize(test_data['multi_turn_sys']) + assert decode(mt_ids) == ' [INST] <>\nlorem\n<>\n\nabc [/INST] ipsum [INST] 123 [/INST] sit' + assert mt_ids == [1, 518, 25580, 29962, 3532, 14816, 29903, 6778, 13, 29880, 3668, 13, 29966, 829, 14816, 29903, 6778, 13, 13, 10736, 518, 29914, 25580, 29962, 23421, 2, 1, 518, 25580, 29962, 29871, 29896, 29906, 29941, 518, 29914, 25580, 29962, 7845, 2] + + # System message, single-turn conversations + st_ids = tokenize(test_data['single_turn_sys']) + assert decode(st_ids) == ' [INST] <>\nlorem\n<>\n\nabc [/INST] ipsum' + assert st_ids == [1, 518, 25580, 29962, 3532, 14816, 29903, 6778, 13, 29880, 3668, 13, 29966, 829, 14816, 29903, 6778, 13, 13, 10736, 518, 29914, 25580, 29962, 23421, 2] + + # No system message, single-turn + ns_ids = tokenize(test_data['single_turn_no_sys']) + assert decode(ns_ids) == ' [INST] abc [/INST] ipsum' + assert ns_ids == [1, 518, 25580, 29962, 25638, 518, 29914, 25580, 29962, 23421, 2] + + # No system message, multi-turn + ns_mt_ids = tokenize(test_data['multi_turn_no_sys']) + assert decode(ns_mt_ids) == ' [INST] abc [/INST] ipsum [INST] 123 [/INST] sit' + assert ns_mt_ids == [1, 518, 25580, 29962, 25638, 518, 29914, 25580, 29962, 23421, 2, 1, 518, 25580, 29962, 29871, 29896, 29906, 29941, 518, 29914, 25580, 29962, 7845, 2] + # fmt: on + + def test_sharegpt_mistral(self): + "Make sure the sharegpt/mistral is tokenized and formatted correctly." + strat = prompt_strat("mistral", self.tokenizer) + + def tokenize(conv): + return strat.tokenize_prompt(deepcopy(conv))["input_ids"] + + def decode(ids): + return strat.tokenizer.decode(ids) + + # fmt: off + # System message, multi-turn conversations + mt_ids = tokenize(test_data['multi_turn_sys']) + assert decode(mt_ids) == ' [INST] lorem\nabc [/INST] ipsum [INST] 123 [/INST] sit' + assert mt_ids == [1, 518, 25580, 29962, 29871, 301, 3668, 13, 10736, 518, 29914, 25580, 29962, 23421, 2, 518, 25580, 29962, 29871, 29896, 29906, 29941, 518, 29914, 25580, 29962, 7845, 2] + + # System message, single-turn conversations + st_ids = tokenize(test_data['single_turn_sys']) + assert decode(st_ids) == ' [INST] lorem\nabc [/INST] ipsum' + assert st_ids == [1, 518, 25580, 29962, 29871, 301, 3668, 13, 10736, 518, 29914, 25580, 29962, 23421, 2] + + # No system message, single-turn + ns_ids = tokenize(test_data['single_turn_no_sys']) + assert decode(ns_ids) == ' [INST] abc [/INST] ipsum' + assert ns_ids == [1, 518, 25580, 29962, 25638, 518, 29914, 25580, 29962, 23421, 2] + + # No system message, multi-turn + ns_mt_ids = tokenize(test_data['multi_turn_no_sys']) + assert decode(ns_mt_ids) == ' [INST] abc [/INST] ipsum [INST] 123 [/INST] sit' + assert ns_mt_ids == [1, 518, 25580, 29962, 25638, 518, 29914, 25580, 29962, 23421, 2, 518, 25580, 29962, 29871, 29896, 29906, 29941, 518, 29914, 25580, 29962, 7845, 2] + # fmt: on + + def test_sharegpt_changes_roles(self): + conversation = { + "roles": ["USER", "CHARACTER"], + "conversations": [ + {"from": "system", "value": "lorem"}, + {"from": "gpt", "value": "ipsum"}, + {"from": "human", "value": "dolor"}, + {"from": "gpt", "value": "sit"}, + ], + } + prompter = ShareGPTPrompterV2() + strat = ShareGPTPromptTokenizingStrategy( + prompter, + self.tokenizer, + False, + 2048, + ) + with self._caplog.at_level(logging.WARNING): + res = strat.tokenize_prompt(conversation) + assert "CHARACTER" in self.tokenizer.decode(res["input_ids"]) + + def test_sharegpt_assistant_label_ignore(self): + conversation = { + "roles": ["user", "assistant"], + "conversations": [ + {"from": "system", "value": "lorem"}, + {"from": "gpt", "value": "ipsum"}, + {"from": "human", "value": "dolor"}, + {"from": "gpt", "value": "sit"}, + ], + } + prompter = ShareGPTPrompterV2() + strat = ShareGPTPromptTokenizingStrategy( + prompter, + self.tokenizer, + False, + 2048, + ) + with self._caplog.at_level(logging.WARNING): + res = strat.tokenize_prompt(conversation) + idx = res["input_ids"].index(20255) # assistant token + assert res["labels"][idx] == -100 + + def test_glaive_tool_label_ignore(self): + conversation = { + "system": "SYSTEM: This is a system prompt", + "chat": "USER: Can you book a flight for me from New York to London? ASSISTANT: I'm sorry, but I don't have the capability to book flights. <|endoftext|>", + } + prompter = ShareGPTPrompterV2() + strat = GlaiveShareGPTPromptTokenizingStrategy( + prompter, + self.tokenizer, + False, + 2048, + ) + with self._caplog.at_level(logging.WARNING): + res = strat.tokenize_prompt(conversation) + idx = res["input_ids"].index(13566) # assistant token + assert res["labels"][idx] == -100 + + def test_no_sys_prompt(self): + """ + tests the interface between the user and assistant parts + """ + prompter = NoSystemPrompter() + # pylint: disable=duplicate-code + strat = AlpacaPromptTokenizingStrategy( + prompter, + self.tokenizer, + False, + 2048, + ) + sample = { + "instruction": "hello cruel. lorem ipsum dolor sit amet.", + "output": "world!", + } + example = strat.tokenize_prompt(sample) + world_idx = example["input_ids"].index(3186) + assert example["labels"][world_idx] == 3186 + assert example["labels"][world_idx - 1] == -100 + + def test_alpaca(self): + """ + tests the interface between the user and assistant parts + """ + # pylint: disable=duplicate-code + prompter = AlpacaPrompter() + strat = AlpacaPromptTokenizingStrategy( + prompter, + self.tokenizer, + False, + 2048, + ) + sample = {"instruction": "hello!", "output": "Hi! How can I help?"} + example = strat.tokenize_prompt(sample) + world_idx = example["input_ids"].index(6324) + assert example["labels"][world_idx] == 6324 + assert example["labels"][world_idx - 1] == -100 + + +class InstructionWSystemPromptTokenizingStrategyTest(unittest.TestCase): + """ + Test class for prompt tokenization strategies with sys prompt from the dataset + """ + + def setUp(self) -> None: + # pylint: disable=duplicate-code + self.tokenizer = AutoTokenizer.from_pretrained("huggyllama/llama-7b") + self.tokenizer.add_special_tokens( + { + "bos_token": "", + "eos_token": "", + "unk_token": "", + } + ) + + def test_system_alpaca(self): + prompter = SystemDataPrompter(PromptStyle.CHAT.value) + strat = InstructionWSystemPromptTokenizingStrategy( + prompter, + self.tokenizer, + False, + 2048, + ) + sample = { + "system": "use cot", + "instruction": "hello!", + "output": "Hi! How can I help?", + } + example = strat.tokenize_prompt(sample) + assert example["input_ids"][0:5] == [ + 1, + 28962, + 1254, + 12665, + 29901, + ] # "SYSTEM:" + assert example["input_ids"][5:7] == [671, 20118] # " use cot" + assert example["input_ids"][8] == 11889 # USER + + +class Llama2ChatTokenizationTest(unittest.TestCase): + """ + Test class for prompt tokenization strategies with sys prompt from the dataset + """ + + def setUp(self) -> None: + # pylint: disable=duplicate-code + self.tokenizer = LlamaTokenizer.from_pretrained("NousResearch/Llama-2-7b-hf") + # woraround because official Meta repos are not open + + def test_llama2_chat_integration(self): + with open( + Path(__file__).parent / "fixtures/conversation.json", encoding="utf-8" + ) as fin: + data = fin.read() + conversation = json.loads(data) + with open( + Path(__file__).parent / "fixtures/conversation.tokenized_llama2chat.json", + encoding="utf-8", + ) as fin: + data = fin.read() + tokenized_conversation = json.loads(data) + prompter = Llama2ChatPrompter() + strat = LLama2ChatTokenizingStrategy( + prompter, + self.tokenizer, + False, + 4096, + ) + example = strat.tokenize_prompt(conversation) + for fields in ["input_ids", "attention_mask", "labels"]: + self.assertEqual(len(example[fields]), len(tokenized_conversation[fields])) + self.assertEqual(example[fields], tokenized_conversation[fields]) + + def compare_with_transformers_integration(self): + # this needs transformers >= v4.31.0 + from transformers.models.llama.tokenization_llama import B_SYS, E_SYS + from transformers.pipelines.conversational import Conversation + + # from transformers.models.llama.tokenization_llama import DEFAULT_SYSTEM_PROMPT + # broken as of 23/7/20 + # see https://github.com/huggingface/transformers/pull/24935 + # pylint: disable=C0103 + DEFAULT_SYSTEM_PROMPT = """\ +You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. + +If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.""" + with open( + Path(__file__).parent / "fixtures/conversation.json", encoding="utf-8" + ) as fin: + data = fin.read() + conversation = json.loads(data) + with open( + Path(__file__).parent / "fixtures/conversation.tokenized_llama2chat.json", + encoding="utf-8", + ) as fin: + data = fin.read() + tokenized_conversation = json.loads(data) + + user_input = [] + answers = [] + for msg in conversation["conversations"]: + if msg["from"] == "human": + user_input.append(msg["value"]) + else: + answers.append(msg["value"]) + hf_conf = Conversation( + text=user_input[-1], + past_user_inputs=[B_SYS + DEFAULT_SYSTEM_PROMPT + E_SYS + user_input[0]] + + user_input[1:-1], + generated_responses=answers, + ) + # pylint: disable=W0212 + hf_tokens = self.tokenizer._build_conversation_input_ids(hf_conf) + + self.assertEqual( + hf_tokens, tokenized_conversation["input_ids"][: len(hf_tokens)] + ) + + +class OrpoTokenizationTest(unittest.TestCase): + """test case for the ORPO tokenization""" + + def setUp(self) -> None: + # pylint: disable=duplicate-code + tokenizer = LlamaTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1") + tokenizer.add_special_tokens( + { + "eos_token": AddedToken( + "<|im_end|>", rstrip=False, lstrip=False, normalized=False + ) + } + ) + tokenizer.add_tokens( + [ + AddedToken( + "<|im_start|>", rstrip=False, lstrip=False, normalized=False + ), + ] + ) + self.tokenizer = tokenizer + self.dataset = load_dataset( + "argilla/ultrafeedback-binarized-preferences-cleaned", split="train" + ).select([0]) + + def test_orpo_integration(self): + strat = load( + self.tokenizer, + DictDefault({"train_on_inputs": False}), + DictDefault({"chat_template": "chatml"}), + ) + res = strat.tokenize_prompt(self.dataset[0]) + assert "rejected_input_ids" in res + assert "rejected_labels" in res + assert "input_ids" in res + assert "labels" in res + assert "prompt_attention_mask" in res + + assert len(res["rejected_input_ids"]) == len(res["rejected_labels"]) + assert len(res["input_ids"]) == len(res["labels"]) + assert len(res["input_ids"]) == len(res["prompt_attention_mask"]) + + assert res["rejected_labels"][0] == -100 + assert res["rejected_input_ids"][-1] == res["rejected_labels"][-1] + + assert res["labels"][0] == -100 + assert res["input_ids"][-1] == res["labels"][-1] + + assert res["prompt_attention_mask"][0] == 1 + assert res["prompt_attention_mask"][-1] == 0 + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_prompters.py b/tests/test_prompters.py new file mode 100644 index 0000000000000000000000000000000000000000..6c5b8f27c2e9c0582adbe152bfca1e4b581f35b3 --- /dev/null +++ b/tests/test_prompters.py @@ -0,0 +1,124 @@ +"""Module testing prompters""" + +import unittest + +from axolotl.prompt_strategies.alpaca_w_system import SystemDataPrompter +from axolotl.prompters import ( + AlpacaPrompter, + MultipleChoiceExplainPrompter, + PromptStyle, + UnpromptedPrompter, +) + + +class AlpacaPrompterTest(unittest.TestCase): + """ + Test AlpacaPrompter + """ + + def test_prompt_style_w_none(self): + prompter = AlpacaPrompter(prompt_style=None) + res = next(prompter.build_prompt("tell me a joke")) + # just testing that it uses instruct style + assert "### Instruction:" in res + + def test_prompt_style_w_instruct(self): + prompter = AlpacaPrompter(prompt_style=PromptStyle.INSTRUCT.value) + res = next( + prompter.build_prompt("tell me a joke about the following", "alpacas") + ) + assert "Below is an instruction" in res + assert "### Instruction:" in res + assert "### Input:" in res + assert "alpacas" in res + assert "### Response:" in res + assert "USER:" not in res + assert "ASSISTANT:" not in res + res = next(prompter.build_prompt("tell me a joke about the following")) + assert "Below is an instruction" in res + assert "### Instruction:" in res + assert "### Input:" not in res + assert "### Response:" in res + assert "USER:" not in res + assert "ASSISTANT:" not in res + + def test_prompt_style_w_chat(self): + prompter = AlpacaPrompter(prompt_style=PromptStyle.CHAT.value) + res = next( + prompter.build_prompt("tell me a joke about the following", "alpacas") + ) + assert "Below is an instruction" in res + assert "### Instruction:" not in res + assert "### Input:" not in res + assert "alpacas" in res + assert "### Response:" not in res + assert "USER:" in res + assert "ASSISTANT:" in res + res = next(prompter.build_prompt("tell me a joke about the following")) + assert "Below is an instruction" in res + assert "### Instruction:" not in res + assert "### Input:" not in res + assert "### Response:" not in res + assert "USER:" in res + assert "ASSISTANT:" in res + + def test_system_prompt(self): + prompter = SystemDataPrompter(prompt_style=PromptStyle.CHAT.value) + res = next( + prompter.build_prompt_w_system( + "use cot", "tell me a joke about the following", "alpacas" + ) + ) + assert "use cot" in res + assert res.startswith("SYSTEM:") + assert "### Instruction:" not in res + assert "### Input:" not in res + assert "alpacas" in res + assert "### Response:" not in res + assert "USER:" in res + assert "ASSISTANT:" in res + + +class UnpromptedPrompterTest(unittest.TestCase): + """ + Test class for UnpromptedPrompter with no system prompts + """ + + def test_prompt_style_w_none(self): + prompter = UnpromptedPrompter(prompt_style=None) + res = next(prompter.build_prompt("tell me a joke")) + assert "### Instruction:" in res + assert "tell me a joke" in res + assert res.startswith("###") + + def test_prompt_style_w_instruct(self): + prompter = UnpromptedPrompter(prompt_style=PromptStyle.INSTRUCT.value) + res = next( + prompter.build_prompt("tell me a joke about the following", "alpacas") + ) + assert "### Instruction:" in res + assert "tell me a joke" in res + assert res.startswith("###") + + def test_prompt_style_w_chat(self): + prompter = UnpromptedPrompter(prompt_style=PromptStyle.CHAT.value) + res = next( + prompter.build_prompt("tell me a joke about the following", "alpacas") + ) + assert "USER:" in res + assert "tell me a joke" in res + assert res.startswith("USER:") + + +class MultipleChoiceExplainPrompterTest(unittest.TestCase): + """ + Test class for MultipleChoiceExplainPrompter + """ + + def test_prompt_style_w_chat(self): + prompter = MultipleChoiceExplainPrompter(prompt_style=PromptStyle.CHAT.value) + res = next(prompter.build_prompt("choose one", "- A\n- B\n- C", "C")) + assert "USER:" in res + assert "choose one" in res + assert "Choose the answer that best answers the question." in res + assert "- A\n- B\n- C" in res diff --git a/tests/test_schedulers.py b/tests/test_schedulers.py new file mode 100644 index 0000000000000000000000000000000000000000..9402d7af7fd59f8aff67db13cb3bf6ddbfce3328 --- /dev/null +++ b/tests/test_schedulers.py @@ -0,0 +1,52 @@ +""" +test module for the axolotl.utis.data module +""" +import unittest + +import torch +from torch.optim import SGD + +from axolotl.utils.schedulers import get_cosine_schedule_with_warmup_decay_constant + + +class TestCosineConstantLr(unittest.TestCase): + """ + test class for encode pretraining and md5 helper + """ + + def setUp(self): + self.train_steps = 1000 + self.warmup_steps = 10 + self.min_lr_ratio = 0.1 + self.constant_lr_ratio = 0.8 + self._lr = 0.01 + self.optimizer = SGD([torch.tensor(1)], lr=self._lr) + self.lr_scheduler = get_cosine_schedule_with_warmup_decay_constant( # pylint: disable=attribute-defined-outside-init + self.optimizer, + num_warmup_steps=self.warmup_steps, + num_training_steps=self.train_steps, + min_lr_ratio=self.min_lr_ratio, + constant_lr_ratio=self.constant_lr_ratio, + ) + + def test_schedulers(self): + self.assertEqual(self.lr_scheduler.get_last_lr()[0], 0) + for _ in range(self.warmup_steps): + self.lr_scheduler.step() + self.assertEqual(self.lr_scheduler.get_last_lr()[0], self._lr) + constant_step = int(self.train_steps * self.constant_lr_ratio) + remaining_step = self.train_steps - constant_step + for _ in range(constant_step): + self.lr_scheduler.step() + self.assertEqual( + self.lr_scheduler.get_last_lr()[0], self._lr * self.min_lr_ratio + ) + for _ in range(remaining_step): + self.lr_scheduler.step() + self.assertEqual( + self.lr_scheduler.get_last_lr()[0], self._lr * self.min_lr_ratio + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_tokenizers.py b/tests/test_tokenizers.py new file mode 100644 index 0000000000000000000000000000000000000000..69c441f8c646e410e01e9040708dbe6c0db50af6 --- /dev/null +++ b/tests/test_tokenizers.py @@ -0,0 +1,87 @@ +""" +Test cases for the tokenizer loading +""" +import unittest + +import pytest + +from axolotl.utils.dict import DictDefault +from axolotl.utils.models import load_tokenizer + + +class TestTokenizers(unittest.TestCase): + """ + test class for the load_tokenizer fn + """ + + def test_default_use_fast(self): + cfg = DictDefault( + { + "tokenizer_config": "huggyllama/llama-7b", + } + ) + tokenizer = load_tokenizer(cfg) + assert "Fast" in tokenizer.__class__.__name__ + + def test_dont_use_fast(self): + cfg = DictDefault( + { + "tokenizer_config": "huggyllama/llama-7b", + "tokenizer_use_fast": False, + } + ) + tokenizer = load_tokenizer(cfg) + assert "Fast" not in tokenizer.__class__.__name__ + + def test_special_tokens_modules_to_save(self): + # setting special_tokens to new token + cfg = DictDefault( + { + "tokenizer_config": "huggyllama/llama-7b", + "adapter": "lora", + "special_tokens": {"bos_token": "[INST]"}, + } + ) + with pytest.raises( + ValueError, + match=r".*Please set lora_modules_to_save*", + ): + load_tokenizer(cfg) + + # setting special_tokens but not changing from default + cfg = DictDefault( + { + "tokenizer_config": "huggyllama/llama-7b", + "adapter": "lora", + "special_tokens": {"bos_token": ""}, + } + ) + load_tokenizer(cfg) + + # non-adapter setting special_tokens + cfg = DictDefault( + { + "tokenizer_config": "huggyllama/llama-7b", + "special_tokens": {"bos_token": "[INST]"}, + } + ) + load_tokenizer(cfg) + + def test_add_additional_special_tokens(self): + cfg = DictDefault( + { + "tokenizer_config": "huggyllama/llama-7b", + "special_tokens": {"additional_special_tokens": ["<|im_start|>"]}, + } + ) + tokenizer = load_tokenizer(cfg) + self.assertEqual(tokenizer("<|im_start|>user")["input_ids"], [1, 32000, 1404]) + self.assertEqual(len(tokenizer), 32001) + + # ensure reloading the tokenizer again from cfg results in same vocab length + tokenizer = load_tokenizer(cfg) + self.assertEqual(len(tokenizer), 32001) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_validation.py b/tests/test_validation.py new file mode 100644 index 0000000000000000000000000000000000000000..70dbc750e68fc7ef34fe9bf5f9cec89c5f16d794 --- /dev/null +++ b/tests/test_validation.py @@ -0,0 +1,1271 @@ +# pylint: disable=too-many-lines +"""Module for testing the validation module""" + +import logging +import os +import warnings +from typing import Optional + +import pytest +from pydantic import ValidationError + +from axolotl.utils.config import validate_config +from axolotl.utils.config.models.input.v0_4_1 import AxolotlConfigWCapabilities +from axolotl.utils.dict import DictDefault +from axolotl.utils.models import check_model_config +from axolotl.utils.wandb_ import setup_wandb_env_vars + +warnings.filterwarnings("error") + + +@pytest.fixture(name="minimal_cfg") +def fixture_cfg(): + return DictDefault( + { + "base_model": "TinyLlama/TinyLlama-1.1B-Chat-v0.6", + "learning_rate": 0.000001, + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + } + ], + "micro_batch_size": 1, + "gradient_accumulation_steps": 1, + } + ) + + +class BaseValidation: + """ + Base validation module to setup the log capture + """ + + _caplog: Optional[pytest.LogCaptureFixture] = None + + @pytest.fixture(autouse=True) + def inject_fixtures(self, caplog): + self._caplog = caplog + + +# pylint: disable=too-many-public-methods +class TestValidation(BaseValidation): + """ + Test the validation module + """ + + def test_datasets_min_length(self): + cfg = DictDefault( + { + "base_model": "TinyLlama/TinyLlama-1.1B-Chat-v0.6", + "learning_rate": 0.000001, + "datasets": [], + "micro_batch_size": 1, + "gradient_accumulation_steps": 1, + } + ) + + with pytest.raises( + ValidationError, + match=r".*List should have at least 1 item after validation*", + ): + validate_config(cfg) + + def test_datasets_min_length_empty(self): + cfg = DictDefault( + { + "base_model": "TinyLlama/TinyLlama-1.1B-Chat-v0.6", + "learning_rate": 0.000001, + "micro_batch_size": 1, + "gradient_accumulation_steps": 1, + } + ) + + with pytest.raises( + ValueError, match=r".*either datasets or pretraining_dataset is required*" + ): + validate_config(cfg) + + def test_pretrain_dataset_min_length(self): + cfg = DictDefault( + { + "base_model": "TinyLlama/TinyLlama-1.1B-Chat-v0.6", + "learning_rate": 0.000001, + "pretraining_dataset": [], + "micro_batch_size": 1, + "gradient_accumulation_steps": 1, + "max_steps": 100, + } + ) + + with pytest.raises( + ValidationError, + match=r".*List should have at least 1 item after validation*", + ): + validate_config(cfg) + + def test_valid_pretrain_dataset(self): + cfg = DictDefault( + { + "base_model": "TinyLlama/TinyLlama-1.1B-Chat-v0.6", + "learning_rate": 0.000001, + "pretraining_dataset": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + } + ], + "micro_batch_size": 1, + "gradient_accumulation_steps": 1, + "max_steps": 100, + } + ) + + validate_config(cfg) + + def test_valid_sft_dataset(self): + cfg = DictDefault( + { + "base_model": "TinyLlama/TinyLlama-1.1B-Chat-v0.6", + "learning_rate": 0.000001, + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + } + ], + "micro_batch_size": 1, + "gradient_accumulation_steps": 1, + } + ) + + validate_config(cfg) + + def test_batch_size_unused_warning(self): + cfg = DictDefault( + { + "base_model": "TinyLlama/TinyLlama-1.1B-Chat-v0.6", + "learning_rate": 0.000001, + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + } + ], + "micro_batch_size": 4, + "batch_size": 32, + } + ) + + with self._caplog.at_level(logging.WARNING): + validate_config(cfg) + assert "batch_size is not recommended" in self._caplog.records[0].message + + def test_batch_size_more_params(self): + cfg = DictDefault( + { + "base_model": "TinyLlama/TinyLlama-1.1B-Chat-v0.6", + "learning_rate": 0.000001, + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + } + ], + "batch_size": 32, + } + ) + + with pytest.raises(ValueError, match=r".*At least two of*"): + validate_config(cfg) + + def test_lr_as_float(self, minimal_cfg): + cfg = ( + DictDefault( # pylint: disable=unsupported-binary-operation + { + "learning_rate": "5e-5", + } + ) + | minimal_cfg + ) + + new_cfg = validate_config(cfg) + + assert new_cfg.learning_rate == 0.00005 + + def test_model_config_remap(self, minimal_cfg): + cfg = ( + DictDefault( + { + "model_config": {"model_type": "mistral"}, + } + ) + | minimal_cfg + ) + + new_cfg = validate_config(cfg) + assert new_cfg.overrides_of_model_config["model_type"] == "mistral" + + def test_model_type_remap(self, minimal_cfg): + cfg = ( + DictDefault( + { + "model_type": "AutoModelForCausalLM", + } + ) + | minimal_cfg + ) + + new_cfg = validate_config(cfg) + assert new_cfg.type_of_model == "AutoModelForCausalLM" + + def test_model_revision_remap(self, minimal_cfg): + cfg = ( + DictDefault( + { + "model_revision": "main", + } + ) + | minimal_cfg + ) + + new_cfg = validate_config(cfg) + assert new_cfg.revision_of_model == "main" + + def test_qlora(self, minimal_cfg): + base_cfg = ( + DictDefault( + { + "adapter": "qlora", + } + ) + | minimal_cfg + ) + + cfg = ( + DictDefault( # pylint: disable=unsupported-binary-operation + { + "load_in_8bit": True, + } + ) + | base_cfg + ) + + with pytest.raises(ValueError, match=r".*8bit.*"): + validate_config(cfg) + + cfg = ( + DictDefault( # pylint: disable=unsupported-binary-operation + { + "gptq": True, + } + ) + | base_cfg + ) + + with pytest.raises(ValueError, match=r".*gptq.*"): + validate_config(cfg) + + cfg = ( + DictDefault( # pylint: disable=unsupported-binary-operation + { + "load_in_4bit": False, + } + ) + | base_cfg + ) + + with pytest.raises(ValueError, match=r".*4bit.*"): + validate_config(cfg) + + cfg = ( + DictDefault( # pylint: disable=unsupported-binary-operation + { + "load_in_4bit": True, + } + ) + | base_cfg + ) + + validate_config(cfg) + + def test_qlora_merge(self, minimal_cfg): + base_cfg = ( + DictDefault( + { + "adapter": "qlora", + "merge_lora": True, + } + ) + | minimal_cfg + ) + + cfg = ( + DictDefault( # pylint: disable=unsupported-binary-operation + { + "load_in_8bit": True, + } + ) + | base_cfg + ) + + with pytest.raises(ValueError, match=r".*8bit.*"): + validate_config(cfg) + + cfg = ( + DictDefault( # pylint: disable=unsupported-binary-operation + { + "gptq": True, + } + ) + | base_cfg + ) + + with pytest.raises(ValueError, match=r".*gptq.*"): + validate_config(cfg) + + cfg = ( + DictDefault( # pylint: disable=unsupported-binary-operation + { + "load_in_4bit": True, + } + ) + | base_cfg + ) + + with pytest.raises(ValueError, match=r".*4bit.*"): + validate_config(cfg) + + def test_hf_use_auth_token(self, minimal_cfg): + cfg = ( + DictDefault( + { + "push_dataset_to_hub": "namespace/repo", + } + ) + | minimal_cfg + ) + + with pytest.raises(ValueError, match=r".*hf_use_auth_token.*"): + validate_config(cfg) + + cfg = ( + DictDefault( + { + "push_dataset_to_hub": "namespace/repo", + "hf_use_auth_token": True, + } + ) + | minimal_cfg + ) + validate_config(cfg) + + def test_gradient_accumulations_or_batch_size(self): + cfg = DictDefault( + { + "base_model": "TinyLlama/TinyLlama-1.1B-Chat-v0.6", + "learning_rate": 0.000001, + "datasets": [ + { + "path": "mhenrichsen/alpaca_2k_test", + "type": "alpaca", + } + ], + "gradient_accumulation_steps": 1, + "batch_size": 1, + } + ) + + with pytest.raises( + ValueError, match=r".*gradient_accumulation_steps or batch_size.*" + ): + validate_config(cfg) + + def test_falcon_fsdp(self, minimal_cfg): + regex_exp = r".*FSDP is not supported for falcon models.*" + + # Check for lower-case + cfg = ( + DictDefault( + { + "base_model": "tiiuae/falcon-7b", + "fsdp": ["full_shard", "auto_wrap"], + } + ) + | minimal_cfg + ) + + with pytest.raises(ValueError, match=regex_exp): + validate_config(cfg) + + # Check for upper-case + cfg = ( + DictDefault( + { + "base_model": "Falcon-7b", + "fsdp": ["full_shard", "auto_wrap"], + } + ) + | minimal_cfg + ) + + with pytest.raises(ValueError, match=regex_exp): + validate_config(cfg) + + cfg = ( + DictDefault( + { + "base_model": "tiiuae/falcon-7b", + } + ) + | minimal_cfg + ) + + validate_config(cfg) + + def test_mpt_gradient_checkpointing(self, minimal_cfg): + regex_exp = r".*gradient_checkpointing is not supported for MPT models*" + + # Check for lower-case + cfg = ( + DictDefault( + { + "base_model": "mosaicml/mpt-7b", + "gradient_checkpointing": True, + } + ) + | minimal_cfg + ) + + with pytest.raises(ValueError, match=regex_exp): + validate_config(cfg) + + def test_flash_optimum(self, minimal_cfg): + cfg = ( + DictDefault( + { + "flash_optimum": True, + "adapter": "lora", + "bf16": False, + } + ) + | minimal_cfg + ) + + with self._caplog.at_level(logging.WARNING): + validate_config(cfg) + assert any( + "BetterTransformers probably doesn't work with PEFT adapters" + in record.message + for record in self._caplog.records + ) + + cfg = ( + DictDefault( + { + "flash_optimum": True, + "bf16": False, + } + ) + | minimal_cfg + ) + + with self._caplog.at_level(logging.WARNING): + validate_config(cfg) + assert any( + "probably set bfloat16 or float16" in record.message + for record in self._caplog.records + ) + + cfg = ( + DictDefault( + { + "flash_optimum": True, + "fp16": True, + } + ) + | minimal_cfg + ) + regex_exp = r".*AMP is not supported.*" + + with pytest.raises(ValueError, match=regex_exp): + validate_config(cfg) + + cfg = ( + DictDefault( + { + "flash_optimum": True, + "bf16": True, + } + ) + | minimal_cfg + ) + regex_exp = r".*AMP is not supported.*" + + with pytest.raises(ValueError, match=regex_exp): + validate_config(cfg) + + def test_adamw_hyperparams(self, minimal_cfg): + cfg = ( + DictDefault( + { + "optimizer": None, + "adam_epsilon": 0.0001, + } + ) + | minimal_cfg + ) + + with self._caplog.at_level(logging.WARNING): + validate_config(cfg) + assert any( + "adamw hyperparameters found, but no adamw optimizer set" + in record.message + for record in self._caplog.records + ) + + cfg = ( + DictDefault( + { + "optimizer": "adafactor", + "adam_beta1": 0.0001, + } + ) + | minimal_cfg + ) + + with self._caplog.at_level(logging.WARNING): + validate_config(cfg) + assert any( + "adamw hyperparameters found, but no adamw optimizer set" + in record.message + for record in self._caplog.records + ) + + cfg = ( + DictDefault( + { + "optimizer": "adamw_bnb_8bit", + "adam_beta1": 0.9, + "adam_beta2": 0.99, + "adam_epsilon": 0.0001, + } + ) + | minimal_cfg + ) + + validate_config(cfg) + + cfg = ( + DictDefault( + { + "optimizer": "adafactor", + } + ) + | minimal_cfg + ) + + validate_config(cfg) + + def test_deprecated_packing(self, minimal_cfg): + cfg = ( + DictDefault( + { + "max_packed_sequence_len": 1024, + } + ) + | minimal_cfg + ) + with pytest.raises( + DeprecationWarning, + match=r"`max_packed_sequence_len` is no longer supported", + ): + validate_config(cfg) + + def test_packing(self, minimal_cfg): + cfg = ( + DictDefault( + { + "sample_packing": True, + "pad_to_sequence_len": None, + } + ) + | minimal_cfg + ) + with self._caplog.at_level(logging.WARNING): + validate_config(cfg) + assert any( + "`pad_to_sequence_len: true` is recommended when using sample_packing" + in record.message + for record in self._caplog.records + ) + + def test_merge_lora_no_bf16_fail(self, minimal_cfg): + """ + This is assumed to be run on a CPU machine, so bf16 is not supported. + """ + + cfg = ( + DictDefault( + { + "bf16": True, + "capabilities": {"bf16": False}, + } + ) + | minimal_cfg + ) + + with pytest.raises(ValueError, match=r".*AMP is not supported on this GPU*"): + AxolotlConfigWCapabilities(**cfg.to_dict()) + + cfg = ( + DictDefault( + { + "bf16": True, + "merge_lora": True, + "capabilities": {"bf16": False}, + } + ) + | minimal_cfg + ) + + validate_config(cfg) + + def test_sharegpt_deprecation(self, minimal_cfg): + cfg = ( + DictDefault( + {"datasets": [{"path": "lorem/ipsum", "type": "sharegpt:chat"}]} + ) + | minimal_cfg + ) + with self._caplog.at_level(logging.WARNING): + new_cfg = validate_config(cfg) + assert any( + "`type: sharegpt:chat` will soon be deprecated." in record.message + for record in self._caplog.records + ) + assert new_cfg.datasets[0].type == "sharegpt" + + cfg = ( + DictDefault( + { + "datasets": [ + {"path": "lorem/ipsum", "type": "sharegpt_simple:load_role"} + ] + } + ) + | minimal_cfg + ) + with self._caplog.at_level(logging.WARNING): + new_cfg = validate_config(cfg) + assert any( + "`type: sharegpt_simple` will soon be deprecated." in record.message + for record in self._caplog.records + ) + assert new_cfg.datasets[0].type == "sharegpt:load_role" + + def test_no_conflict_save_strategy(self, minimal_cfg): + cfg = ( + DictDefault( + { + "save_strategy": "epoch", + "save_steps": 10, + } + ) + | minimal_cfg + ) + + with pytest.raises( + ValueError, match=r".*save_strategy and save_steps mismatch.*" + ): + validate_config(cfg) + + cfg = ( + DictDefault( + { + "save_strategy": "no", + "save_steps": 10, + } + ) + | minimal_cfg + ) + + with pytest.raises( + ValueError, match=r".*save_strategy and save_steps mismatch.*" + ): + validate_config(cfg) + + cfg = ( + DictDefault( + { + "save_strategy": "steps", + } + ) + | minimal_cfg + ) + + validate_config(cfg) + + cfg = ( + DictDefault( + { + "save_strategy": "steps", + "save_steps": 10, + } + ) + | minimal_cfg + ) + + validate_config(cfg) + + cfg = ( + DictDefault( + { + "save_steps": 10, + } + ) + | minimal_cfg + ) + + validate_config(cfg) + + cfg = ( + DictDefault( + { + "save_strategy": "no", + } + ) + | minimal_cfg + ) + + validate_config(cfg) + + def test_no_conflict_eval_strategy(self, minimal_cfg): + cfg = ( + DictDefault( + { + "evaluation_strategy": "epoch", + "eval_steps": 10, + } + ) + | minimal_cfg + ) + + with pytest.raises( + ValueError, match=r".*evaluation_strategy and eval_steps mismatch.*" + ): + validate_config(cfg) + + cfg = ( + DictDefault( + { + "evaluation_strategy": "no", + "eval_steps": 10, + } + ) + | minimal_cfg + ) + + with pytest.raises( + ValueError, match=r".*evaluation_strategy and eval_steps mismatch.*" + ): + validate_config(cfg) + + cfg = ( + DictDefault( + { + "evaluation_strategy": "steps", + } + ) + | minimal_cfg + ) + + validate_config(cfg) + + cfg = ( + DictDefault( + { + "evaluation_strategy": "steps", + "eval_steps": 10, + } + ) + | minimal_cfg + ) + + validate_config(cfg) + + cfg = ( + DictDefault( + { + "eval_steps": 10, + } + ) + | minimal_cfg + ) + + validate_config(cfg) + + cfg = ( + DictDefault( + { + "evaluation_strategy": "no", + } + ) + | minimal_cfg + ) + + validate_config(cfg) + + cfg = ( + DictDefault( + { + "evaluation_strategy": "epoch", + "val_set_size": 0, + } + ) + | minimal_cfg + ) + + with pytest.raises( + ValueError, + match=r".*eval_steps and evaluation_strategy are not supported with val_set_size == 0.*", + ): + validate_config(cfg) + + cfg = ( + DictDefault( + { + "eval_steps": 10, + "val_set_size": 0, + } + ) + | minimal_cfg + ) + + with pytest.raises( + ValueError, + match=r".*eval_steps and evaluation_strategy are not supported with val_set_size == 0.*", + ): + validate_config(cfg) + + cfg = ( + DictDefault( + { + "val_set_size": 0, + } + ) + | minimal_cfg + ) + + validate_config(cfg) + + cfg = ( + DictDefault( + { + "eval_steps": 10, + "val_set_size": 0.01, + } + ) + | minimal_cfg + ) + + validate_config(cfg) + + cfg = ( + DictDefault( + { + "evaluation_strategy": "epoch", + "val_set_size": 0.01, + } + ) + | minimal_cfg + ) + + validate_config(cfg) + + def test_eval_table_size_conflict_eval_packing(self, minimal_cfg): + cfg = ( + DictDefault( + { + "sample_packing": True, + "eval_table_size": 100, + } + ) + | minimal_cfg + ) + + with pytest.raises( + ValueError, match=r".*Please set 'eval_sample_packing' to false.*" + ): + validate_config(cfg) + + cfg = ( + DictDefault( + { + "sample_packing": True, + "eval_sample_packing": False, + } + ) + | minimal_cfg + ) + + validate_config(cfg) + + cfg = ( + DictDefault( + { + "sample_packing": False, + "eval_table_size": 100, + } + ) + | minimal_cfg + ) + + validate_config(cfg) + + cfg = ( + DictDefault( + { + "sample_packing": True, + "eval_table_size": 100, + "eval_sample_packing": False, + } + ) + | minimal_cfg + ) + + validate_config(cfg) + + def test_load_in_x_bit_without_adapter(self, minimal_cfg): + cfg = ( + DictDefault( + { + "load_in_4bit": True, + } + ) + | minimal_cfg + ) + + with pytest.raises( + ValueError, + match=r".*load_in_8bit and load_in_4bit are not supported without setting an adapter.*", + ): + validate_config(cfg) + + cfg = ( + DictDefault( + { + "load_in_8bit": True, + } + ) + | minimal_cfg + ) + + with pytest.raises( + ValueError, + match=r".*load_in_8bit and load_in_4bit are not supported without setting an adapter.*", + ): + validate_config(cfg) + + cfg = ( + DictDefault( + { + "load_in_4bit": True, + "adapter": "qlora", + } + ) + | minimal_cfg + ) + + validate_config(cfg) + + cfg = ( + DictDefault( + { + "load_in_8bit": True, + "adapter": "lora", + } + ) + | minimal_cfg + ) + + validate_config(cfg) + + def test_warmup_step_no_conflict(self, minimal_cfg): + cfg = ( + DictDefault( + { + "warmup_steps": 10, + "warmup_ratio": 0.1, + } + ) + | minimal_cfg + ) + + with pytest.raises( + ValueError, + match=r".*warmup_steps and warmup_ratio are mutually exclusive*", + ): + validate_config(cfg) + + cfg = ( + DictDefault( + { + "warmup_steps": 10, + } + ) + | minimal_cfg + ) + + validate_config(cfg) + + cfg = ( + DictDefault( + { + "warmup_ratio": 0.1, + } + ) + | minimal_cfg + ) + + validate_config(cfg) + + def test_unfrozen_parameters_w_peft_layers_to_transform(self, minimal_cfg): + cfg = ( + DictDefault( + { + "adapter": "lora", + "unfrozen_parameters": [ + "model.layers.2[0-9]+.block_sparse_moe.gate.*" + ], + "peft_layers_to_transform": [0, 1], + } + ) + | minimal_cfg + ) + + with pytest.raises( + ValueError, + match=r".*can have unexpected behavior*", + ): + validate_config(cfg) + + def test_hub_model_id_save_value_warns(self, minimal_cfg): + cfg = DictDefault({"hub_model_id": "test"}) | minimal_cfg + + with self._caplog.at_level(logging.WARNING): + validate_config(cfg) + assert ( + "set without any models being saved" in self._caplog.records[0].message + ) + + def test_hub_model_id_save_value(self, minimal_cfg): + cfg = DictDefault({"hub_model_id": "test", "saves_per_epoch": 4}) | minimal_cfg + + with self._caplog.at_level(logging.WARNING): + validate_config(cfg) + assert len(self._caplog.records) == 0 + + +class TestValidationCheckModelConfig(BaseValidation): + """ + Test the validation for the config when the model config is available + """ + + def test_llama_add_tokens_adapter(self, minimal_cfg): + cfg = ( + DictDefault( + {"adapter": "qlora", "load_in_4bit": True, "tokens": ["<|imstart|>"]} + ) + | minimal_cfg + ) + model_config = DictDefault({"model_type": "llama"}) + + with pytest.raises( + ValueError, + match=r".*`lora_modules_to_save` not properly set when adding new tokens*", + ): + check_model_config(cfg, model_config) + + cfg = ( + DictDefault( + { + "adapter": "qlora", + "load_in_4bit": True, + "tokens": ["<|imstart|>"], + "lora_modules_to_save": ["embed_tokens"], + } + ) + | minimal_cfg + ) + + with pytest.raises( + ValueError, + match=r".*`lora_modules_to_save` not properly set when adding new tokens*", + ): + check_model_config(cfg, model_config) + + cfg = ( + DictDefault( + { + "adapter": "qlora", + "load_in_4bit": True, + "tokens": ["<|imstart|>"], + "lora_modules_to_save": ["embed_tokens", "lm_head"], + } + ) + | minimal_cfg + ) + + check_model_config(cfg, model_config) + + def test_phi_add_tokens_adapter(self, minimal_cfg): + cfg = ( + DictDefault( + {"adapter": "qlora", "load_in_4bit": True, "tokens": ["<|imstart|>"]} + ) + | minimal_cfg + ) + model_config = DictDefault({"model_type": "phi"}) + + with pytest.raises( + ValueError, + match=r".*`lora_modules_to_save` not properly set when adding new tokens*", + ): + check_model_config(cfg, model_config) + + cfg = ( + DictDefault( + { + "adapter": "qlora", + "load_in_4bit": True, + "tokens": ["<|imstart|>"], + "lora_modules_to_save": ["embd.wte", "lm_head.linear"], + } + ) + | minimal_cfg + ) + + with pytest.raises( + ValueError, + match=r".*`lora_modules_to_save` not properly set when adding new tokens*", + ): + check_model_config(cfg, model_config) + + cfg = ( + DictDefault( + { + "adapter": "qlora", + "load_in_4bit": True, + "tokens": ["<|imstart|>"], + "lora_modules_to_save": ["embed_tokens", "lm_head"], + } + ) + | minimal_cfg + ) + + check_model_config(cfg, model_config) + + +class TestValidationWandb(BaseValidation): + """ + Validation test for wandb + """ + + def test_wandb_set_run_id_to_name(self, minimal_cfg): + cfg = ( + DictDefault( + { + "wandb_run_id": "foo", + } + ) + | minimal_cfg + ) + + with self._caplog.at_level(logging.WARNING): + new_cfg = validate_config(cfg) + assert any( + "wandb_run_id sets the ID of the run. If you would like to set the name, please use wandb_name instead." + in record.message + for record in self._caplog.records + ) + + assert new_cfg.wandb_name == "foo" and new_cfg.wandb_run_id == "foo" + + cfg = ( + DictDefault( + { + "wandb_name": "foo", + } + ) + | minimal_cfg + ) + + new_cfg = validate_config(cfg) + + assert new_cfg.wandb_name == "foo" and new_cfg.wandb_run_id is None + + def test_wandb_sets_env(self, minimal_cfg): + cfg = ( + DictDefault( + { + "wandb_project": "foo", + "wandb_name": "bar", + "wandb_run_id": "bat", + "wandb_entity": "baz", + "wandb_mode": "online", + "wandb_watch": "false", + "wandb_log_model": "checkpoint", + } + ) + | minimal_cfg + ) + + new_cfg = validate_config(cfg) + + setup_wandb_env_vars(new_cfg) + + assert os.environ.get("WANDB_PROJECT", "") == "foo" + assert os.environ.get("WANDB_NAME", "") == "bar" + assert os.environ.get("WANDB_RUN_ID", "") == "bat" + assert os.environ.get("WANDB_ENTITY", "") == "baz" + assert os.environ.get("WANDB_MODE", "") == "online" + assert os.environ.get("WANDB_WATCH", "") == "false" + assert os.environ.get("WANDB_LOG_MODEL", "") == "checkpoint" + assert os.environ.get("WANDB_DISABLED", "") != "true" + + os.environ.pop("WANDB_PROJECT", None) + os.environ.pop("WANDB_NAME", None) + os.environ.pop("WANDB_RUN_ID", None) + os.environ.pop("WANDB_ENTITY", None) + os.environ.pop("WANDB_MODE", None) + os.environ.pop("WANDB_WATCH", None) + os.environ.pop("WANDB_LOG_MODEL", None) + os.environ.pop("WANDB_DISABLED", None) + + def test_wandb_set_disabled(self, minimal_cfg): + cfg = DictDefault({}) | minimal_cfg + + new_cfg = validate_config(cfg) + + setup_wandb_env_vars(new_cfg) + + assert os.environ.get("WANDB_DISABLED", "") == "true" + + cfg = ( + DictDefault( + { + "wandb_project": "foo", + } + ) + | minimal_cfg + ) + + new_cfg = validate_config(cfg) + + setup_wandb_env_vars(new_cfg) + + assert os.environ.get("WANDB_DISABLED", "") != "true" + + os.environ.pop("WANDB_PROJECT", None) + os.environ.pop("WANDB_DISABLED", None) diff --git a/tests/utils/test_models.py b/tests/utils/test_models.py new file mode 100644 index 0000000000000000000000000000000000000000..e06bb6c250d3ab73edddc65cb3e83c713e81339a --- /dev/null +++ b/tests/utils/test_models.py @@ -0,0 +1,37 @@ +"""Module for testing models utils file.""" + + +import unittest +from unittest.mock import patch + +import pytest + +from axolotl.utils.dict import DictDefault +from axolotl.utils.models import load_model + + +class ModelsUtilsTest(unittest.TestCase): + """Testing module for models utils.""" + + def test_cfg_throws_error_with_s2_attention_and_sample_packing(self): + cfg = DictDefault( + { + "s2_attention": True, + "sample_packing": True, + "base_model": "", + "model_type": "LlamaForCausalLM", + } + ) + + # Mock out call to HF hub + with patch( + "axolotl.utils.models.load_model_config" + ) as mocked_load_model_config: + mocked_load_model_config.return_value = {} + with pytest.raises(ValueError) as exc: + # Should error before hitting tokenizer, so we pass in an empty str + load_model(cfg, tokenizer="") + assert ( + "shifted-sparse attention does not currently support sample packing" + in str(exc.value) + )