|
|
name: Self-hosted runner AMD GPU (push) |
|
|
|
|
|
on: |
|
|
workflow_call: |
|
|
inputs: |
|
|
gpu_flavor: |
|
|
required: true |
|
|
type: string |
|
|
|
|
|
env: |
|
|
HF_HOME: /mnt/cache |
|
|
TRANSFORMERS_IS_CI: yes |
|
|
OMP_NUM_THREADS: 8 |
|
|
MKL_NUM_THREADS: 8 |
|
|
PYTEST_TIMEOUT: 60 |
|
|
TF_FORCE_GPU_ALLOW_GROWTH: true |
|
|
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} |
|
|
|
|
|
jobs: |
|
|
check_runner_status: |
|
|
name: Check Runner Status |
|
|
runs-on: ubuntu-22.04 |
|
|
steps: |
|
|
- name: Checkout transformers |
|
|
uses: actions/checkout@v4 |
|
|
with: |
|
|
fetch-depth: 2 |
|
|
|
|
|
- name: Check Runner Status |
|
|
run: python utils/check_self_hosted_runner.py --target_runners amd-mi210-single-gpu-ci-runner-docker --token ${{ secrets.ACCESS_REPO_INFO_TOKEN }} |
|
|
|
|
|
check_runners: |
|
|
name: Check Runners |
|
|
needs: check_runner_status |
|
|
strategy: |
|
|
matrix: |
|
|
machine_type: [single-gpu, multi-gpu] |
|
|
runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] |
|
|
container: |
|
|
image: huggingface/transformers-pytorch-amd-gpu-push-ci |
|
|
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ |
|
|
steps: |
|
|
- name: ROCM-SMI |
|
|
run: | |
|
|
rocm-smi |
|
|
- name: ROCM-INFO |
|
|
run: | |
|
|
rocminfo | grep "Agent" -A 14 |
|
|
- name: Show ROCR environment |
|
|
run: | |
|
|
echo "ROCR: $ROCR_VISIBLE_DEVICES" |
|
|
|
|
|
setup_gpu: |
|
|
name: Setup |
|
|
needs: check_runners |
|
|
strategy: |
|
|
matrix: |
|
|
machine_type: [single-gpu, multi-gpu] |
|
|
runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] |
|
|
container: |
|
|
image: huggingface/transformers-pytorch-amd-gpu-push-ci |
|
|
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ |
|
|
outputs: |
|
|
matrix: ${{ steps.set-matrix.outputs.matrix }} |
|
|
test_map: ${{ steps.set-matrix.outputs.test_map }} |
|
|
env: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CI_BRANCH_PUSH: ${{ github.event.ref }} |
|
|
CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} |
|
|
CI_SHA_PUSH: ${{ github.event.head_commit.id }} |
|
|
CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} |
|
|
steps: |
|
|
|
|
|
|
|
|
- name: Prepare custom environment variables |
|
|
shell: bash |
|
|
|
|
|
|
|
|
run: | |
|
|
CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} |
|
|
echo $CI_BRANCH_PUSH |
|
|
echo $CI_BRANCH_WORKFLOW_RUN |
|
|
echo $CI_SHA_PUSH |
|
|
echo $CI_SHA_WORKFLOW_RUN |
|
|
[[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV |
|
|
[[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV |
|
|
|
|
|
- name: print environment variables |
|
|
run: | |
|
|
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}" |
|
|
echo "env.CI_SHA = ${{ env.CI_SHA }}" |
|
|
|
|
|
- name: Update clone using environment variables |
|
|
working-directory: /transformers |
|
|
run: | |
|
|
echo "original branch = $(git branch --show-current)" |
|
|
git fetch && git checkout ${{ env.CI_BRANCH }} |
|
|
echo "updated branch = $(git branch --show-current)" |
|
|
git checkout ${{ env.CI_SHA }} |
|
|
echo "log = $(git log -n 1)" |
|
|
|
|
|
- name: Cleanup |
|
|
working-directory: /transformers |
|
|
run: | |
|
|
rm -rf tests/__pycache__ |
|
|
rm -rf tests/models/__pycache__ |
|
|
rm -rf reports |
|
|
|
|
|
- name: Show installed libraries and their versions |
|
|
working-directory: /transformers |
|
|
run: pip freeze |
|
|
|
|
|
- name: Fetch the tests to run |
|
|
working-directory: /transformers |
|
|
|
|
|
run: | |
|
|
pip install --upgrade git-python |
|
|
python3 utils/tests_fetcher.py --diff_with_last_commit | tee test_preparation.txt |
|
|
|
|
|
- name: Report fetched tests |
|
|
uses: actions/upload-artifact@v4 |
|
|
with: |
|
|
name: test_fetched |
|
|
path: /transformers/test_preparation.txt |
|
|
|
|
|
- id: set-matrix |
|
|
name: Organize tests into models |
|
|
working-directory: /transformers |
|
|
|
|
|
|
|
|
|
|
|
run: | |
|
|
if [ -f test_map.json ]; then |
|
|
keys=$(python3 -c 'import json; fp = open("test_map.json"); test_map = json.load(fp); fp.close(); d = list(test_map.keys()); print(d)') |
|
|
test_map=$(python3 -c 'import json; fp = open("test_map.json"); test_map = json.load(fp); fp.close(); print(test_map)') |
|
|
else |
|
|
keys=$(python3 -c 'keys = ["dummy"]; print(keys)') |
|
|
test_map=$(python3 -c 'test_map = {"dummy": []}; print(test_map)') |
|
|
fi |
|
|
echo $keys |
|
|
echo $test_map |
|
|
echo "matrix=$keys" >> $GITHUB_OUTPUT |
|
|
echo "test_map=$test_map" >> $GITHUB_OUTPUT |
|
|
|
|
|
run_models_gpu: |
|
|
name: Model tests |
|
|
needs: setup_gpu |
|
|
|
|
|
if: contains(fromJson(needs.setup_gpu.outputs.matrix), 'dummy') != true |
|
|
strategy: |
|
|
fail-fast: false |
|
|
matrix: |
|
|
folders: ${{ fromJson(needs.setup_gpu.outputs.matrix) }} |
|
|
machine_type: [single-gpu, multi-gpu] |
|
|
runs-on: [self-hosted, amd-gpu, '${{ matrix.machine_type }}', '${{ inputs.gpu_flavor }}'] |
|
|
container: |
|
|
image: huggingface/transformers-pytorch-amd-gpu-push-ci |
|
|
options: --device /dev/kfd --device /dev/dri --env ROCR_VISIBLE_DEVICES --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ |
|
|
env: |
|
|
|
|
|
CI_BRANCH_PUSH: ${{ github.event.ref }} |
|
|
CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} |
|
|
CI_SHA_PUSH: ${{ github.event.head_commit.id }} |
|
|
CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} |
|
|
steps: |
|
|
|
|
|
|
|
|
- name: Prepare custom environment variables |
|
|
shell: bash |
|
|
|
|
|
run: | |
|
|
CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} |
|
|
echo $CI_BRANCH_PUSH |
|
|
echo $CI_BRANCH_WORKFLOW_RUN |
|
|
echo $CI_SHA_PUSH |
|
|
echo $CI_SHA_WORKFLOW_RUN |
|
|
[[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV |
|
|
[[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV |
|
|
|
|
|
- name: print environment variables |
|
|
run: | |
|
|
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}" |
|
|
echo "env.CI_SHA = ${{ env.CI_SHA }}" |
|
|
|
|
|
- name: Update clone using environment variables |
|
|
working-directory: /transformers |
|
|
run: | |
|
|
echo "original branch = $(git branch --show-current)" |
|
|
git fetch && git checkout ${{ env.CI_BRANCH }} |
|
|
echo "updated branch = $(git branch --show-current)" |
|
|
git checkout ${{ env.CI_SHA }} |
|
|
echo "log = $(git log -n 1)" |
|
|
|
|
|
- name: Reinstall transformers in edit mode (remove the one installed during docker image build) |
|
|
working-directory: /transformers |
|
|
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . |
|
|
|
|
|
- name: Echo folder ${{ matrix.folders }} |
|
|
shell: bash |
|
|
|
|
|
|
|
|
run: | |
|
|
echo "${{ matrix.folders }}" |
|
|
echo "${{ fromJson(needs.setup_gpu.outputs.test_map)[matrix.folders] }}" |
|
|
matrix_folders=${{ matrix.folders }} |
|
|
matrix_folders=${matrix_folders/'models/'/'models_'} |
|
|
echo "$matrix_folders" |
|
|
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV |
|
|
|
|
|
- name: ROCM-SMI |
|
|
run: | |
|
|
rocm-smi |
|
|
- name: ROCM-INFO |
|
|
run: | |
|
|
rocminfo | grep "Agent" -A 14 |
|
|
- name: Show ROCR environment |
|
|
run: | |
|
|
echo "ROCR: $ROCR_VISIBLE_DEVICES" |
|
|
|
|
|
- name: Environment |
|
|
working-directory: /transformers |
|
|
run: | |
|
|
python3 utils/print_env.py |
|
|
|
|
|
- name: Show installed libraries and their versions |
|
|
working-directory: /transformers |
|
|
run: pip freeze |
|
|
|
|
|
- name: Run all non-slow selected tests on GPU |
|
|
working-directory: /transformers |
|
|
run: | |
|
|
python3 -m pytest -n 2 --dist=loadfile -v --make-reports=${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports ${{ fromJson(needs.setup_gpu.outputs.test_map)[matrix.folders] }} -m "not not_device_test" |
|
|
|
|
|
- name: Failure short reports |
|
|
if: ${{ failure() }} |
|
|
continue-on-error: true |
|
|
run: cat /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt |
|
|
|
|
|
- name: "Test suite reports artifacts: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports" |
|
|
if: ${{ always() }} |
|
|
uses: actions/upload-artifact@v4 |
|
|
with: |
|
|
name: ${{ matrix.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports |
|
|
path: /transformers/reports/${{ matrix.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports |
|
|
|
|
|
send_results: |
|
|
name: Send results to webhook |
|
|
runs-on: ubuntu-22.04 |
|
|
if: always() |
|
|
needs: [ |
|
|
check_runner_status, |
|
|
check_runners, |
|
|
setup_gpu, |
|
|
run_models_gpu, |
|
|
|
|
|
|
|
|
] |
|
|
env: |
|
|
|
|
|
CI_BRANCH_PUSH: ${{ github.event.ref }} |
|
|
CI_BRANCH_WORKFLOW_RUN: ${{ github.event.workflow_run.head_branch }} |
|
|
CI_SHA_PUSH: ${{ github.event.head_commit.id }} |
|
|
CI_SHA_WORKFLOW_RUN: ${{ github.event.workflow_run.head_sha }} |
|
|
steps: |
|
|
- name: Preliminary job status |
|
|
shell: bash |
|
|
|
|
|
run: | |
|
|
echo "Runner availability: ${{ needs.check_runner_status.result }}" |
|
|
echo "Setup status: ${{ needs.setup_gpu.result }}" |
|
|
echo "Runner status: ${{ needs.check_runners.result }}" |
|
|
|
|
|
|
|
|
|
|
|
- name: Prepare custom environment variables |
|
|
shell: bash |
|
|
|
|
|
run: | |
|
|
CI_BRANCH_PUSH=${CI_BRANCH_PUSH/'refs/heads/'/''} |
|
|
echo $CI_BRANCH_PUSH |
|
|
echo $CI_BRANCH_WORKFLOW_RUN |
|
|
echo $CI_SHA_PUSH |
|
|
echo $CI_SHA_WORKFLOW_RUN |
|
|
[[ ! -z "$CI_BRANCH_PUSH" ]] && echo "CI_BRANCH=$CI_BRANCH_PUSH" >> $GITHUB_ENV || echo "CI_BRANCH=$CI_BRANCH_WORKFLOW_RUN" >> $GITHUB_ENV |
|
|
[[ ! -z "$CI_SHA_PUSH" ]] && echo "CI_SHA=$CI_SHA_PUSH" >> $GITHUB_ENV || echo "CI_SHA=$CI_SHA_WORKFLOW_RUN" >> $GITHUB_ENV |
|
|
|
|
|
- name: print environment variables |
|
|
run: | |
|
|
echo "env.CI_BRANCH = ${{ env.CI_BRANCH }}" |
|
|
echo "env.CI_SHA = ${{ env.CI_SHA }}" |
|
|
|
|
|
- uses: actions/checkout@v4 |
|
|
|
|
|
|
|
|
|
|
|
with: |
|
|
fetch-depth: 20 |
|
|
|
|
|
- name: Update clone using environment variables |
|
|
run: | |
|
|
echo "original branch = $(git branch --show-current)" |
|
|
git fetch && git checkout ${{ env.CI_BRANCH }} |
|
|
echo "updated branch = $(git branch --show-current)" |
|
|
git checkout ${{ env.CI_SHA }} |
|
|
echo "log = $(git log -n 1)" |
|
|
|
|
|
- uses: actions/download-artifact@v4 |
|
|
- name: Send message to Slack |
|
|
env: |
|
|
CI_SLACK_BOT_TOKEN: ${{ secrets.CI_SLACK_BOT_TOKEN }} |
|
|
CI_SLACK_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID }} |
|
|
CI_SLACK_CHANNEL_ID_DAILY: ${{ secrets.CI_SLACK_CHANNEL_ID_DAILY }} |
|
|
CI_SLACK_CHANNEL_ID_AMD: ${{ secrets.CI_SLACK_CHANNEL_ID_AMD }} |
|
|
CI_SLACK_CHANNEL_DUMMY_TESTS: ${{ secrets.CI_SLACK_CHANNEL_DUMMY_TESTS }} |
|
|
CI_SLACK_REPORT_CHANNEL_ID: ${{ secrets.CI_SLACK_CHANNEL_ID_AMD }} |
|
|
ACCESS_REPO_INFO_TOKEN: ${{ secrets.ACCESS_REPO_INFO_TOKEN }} |
|
|
CI_EVENT: Push CI (AMD) - ${{ inputs.gpu_flavor }} |
|
|
CI_TITLE_PUSH: ${{ github.event.head_commit.message }} |
|
|
CI_TITLE_WORKFLOW_RUN: ${{ github.event.workflow_run.head_commit.message }} |
|
|
CI_SHA: ${{ env.CI_SHA }} |
|
|
RUNNER_STATUS: ${{ needs.check_runner_status.result }} |
|
|
RUNNER_ENV_STATUS: ${{ needs.check_runners.result }} |
|
|
SETUP_STATUS: ${{ needs.setup_gpu.result }} |
|
|
|
|
|
|
|
|
|
|
|
run: | |
|
|
pip install huggingface_hub |
|
|
pip install slack_sdk |
|
|
pip show slack_sdk |
|
|
python utils/notification_service.py "${{ needs.setup_gpu.outputs.matrix }}" |
|
|
|