iblfe commited on
Commit
b585c7f
1 Parent(s): 23490d1

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .dockerignore +17 -0
  2. .env +16 -0
  3. .gitattributes +4 -0
  4. .github/workflows/python-package-publish.yml +57 -0
  5. .github/workflows/snyk-scan.yml +76 -0
  6. .gitignore +44 -0
  7. .ipynb_checkpoints/generate-checkpoint.py +16 -0
  8. .ipynb_checkpoints/requirements-checkpoint.txt +77 -0
  9. Dockerfile +35 -0
  10. LICENSE +201 -0
  11. Makefile +116 -0
  12. README.md +313 -8
  13. auth.json.lock +0 -0
  14. benchmarks/llm_gpu_benchmark.py +123 -0
  15. benchmarks/llm_gpu_benchmark_text-generation-inference.html +7 -0
  16. benchmarks/llm_gpu_benchmark_transformers.html +7 -0
  17. benchmarks/llm_gpu_benchmarks.json +2790 -0
  18. benchmarks/perf.json +136 -0
  19. benchmarks/perf.md +200 -0
  20. blog/README.md +81 -0
  21. ci/jenkinsfile +158 -0
  22. client/.gitignore +168 -0
  23. client/Makefile +58 -0
  24. client/README.md +107 -0
  25. client/h2ogpt_client/__init__.py +4 -0
  26. client/h2ogpt_client/_completion.py +507 -0
  27. client/h2ogpt_client/_core.py +50 -0
  28. client/h2ogpt_client/_gradio_client.py +54 -0
  29. client/h2ogpt_client/_models.py +35 -0
  30. client/h2ogpt_client/_server.py +18 -0
  31. client/poetry.lock +856 -0
  32. client/poetry.toml +1 -0
  33. client/pyproject.toml +41 -0
  34. client/tests/__init__.py +0 -0
  35. client/tests/conftest.py +57 -0
  36. client/tests/test_client.py +156 -0
  37. cloud/packer/Jenkinsfile +80 -0
  38. cloud/packer/README.md +22 -0
  39. cloud/packer/h2oai-h2ogpt-4096-llama2-13b-chat.sh +11 -0
  40. cloud/packer/h2ogpt-azure.json +123 -0
  41. cloud/packer/h2ogpt-gcp.json +107 -0
  42. cloud/packer/install_h2ogpt.sh +19 -0
  43. cloud/packer/setup_environment.sh +46 -0
  44. cloud/packer/startup-scripts/h2ogpt.service +12 -0
  45. cloud/packer/startup-scripts/h2ogpt_nginx.service +12 -0
  46. cloud/packer/startup-scripts/run_h2ogpt.sh +26 -0
  47. cloud/packer/startup-scripts/run_nginx.sh +23 -0
  48. cloud/packer/startup-scripts/run_vllm.sh +10 -0
  49. cloud/packer/startup-scripts/temp.conf +14 -0
  50. cloud/packer/startup-scripts/vllm.service +12 -0
.dockerignore ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .git
2
+ .npm
3
+ .dockerignore
4
+ .pytest_cache
5
+ .cache
6
+ .local
7
+ .github
8
+ .nv
9
+ .benchmarks
10
+ .bash_history
11
+ .gitignore
12
+ h2ogpt.egg-info
13
+ venv
14
+ build
15
+ dist
16
+ prebuilt_deps
17
+ Dockerfile
.env ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # H2OGPT
3
+
4
+ H2OGPT_PORT=7860
5
+ H2OGPT_BASE_MODEL=h2oai/h2ogpt-4096-llama2-7b-chat
6
+ H2OGPT_ARGS="/workspace/generate.py --base_model=${H2OGPT_BASE_MODEL} --use_safetensors=True --prompt_type=llama2 --save_dir=/workspace/save/ --use_gpu_id=False --score_model=None --max_max_new_tokens=2048 --max_new_tokens=1024"
7
+
8
+ # VLLM
9
+
10
+ VLLM_TOKENIZER=hf-internal-testing/llama-tokenizer
11
+ H2OGPT_VLLM_ARGS="--model=${H2OGPT_BASE_MODEL} --tokenizer=${VLLM_TOKENIZER} --tensor-parallel-size=2 --seed=1234 --trust-remote-code --download-dir=/workspace/.cache/huggingface/hub"
12
+
13
+ # CPU models
14
+
15
+ MODEL_PATH_LLAMA=https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q6_K.gguf
16
+ H2OGPT_CPU_ARGS="/workspace/generate.py --base_model=llama --model_path_llama=${MODEL_PATH_LLAMA} --max_seq_len=4096"
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ data/demo.png filter=lfs diff=lfs merge=lfs -text
37
+ docs/aws_sagemaker_endpoint_setup.pdf filter=lfs diff=lfs merge=lfs -text
38
+ tests/CityofTshwaneWater.pdf filter=lfs diff=lfs merge=lfs -text
39
+ tests/ocr2.png filter=lfs diff=lfs merge=lfs -text
.github/workflows/python-package-publish.yml ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Build & Publish h2oGPT Python wheel to PYPI
2
+
3
+ on:
4
+ workflow_dispatch:
5
+ inputs:
6
+ pypi-index:
7
+ type: choice
8
+ description: PyPI index that needed to be published
9
+ required: true
10
+ default: Test-PyPI
11
+ options:
12
+ - PyPI
13
+ - Test-PyPI
14
+ version:
15
+ description: |
16
+ Override the current version for the python package for dev purposes when uploading to Test-PyPI
17
+ type: string
18
+
19
+ jobs:
20
+ build_and_upload:
21
+ runs-on: ubuntu-latest
22
+ steps:
23
+ - uses: actions/checkout@v3.5.3
24
+
25
+ - uses: actions/setup-python@v4
26
+ with:
27
+ python-version: '3.10'
28
+
29
+ - name: Install Dependencies
30
+ run: |
31
+ python3.10 -m pip install --upgrade pip
32
+ python3.10 -m pip install setuptools wheel twine --upgrade
33
+
34
+ - name: Modify Version
35
+ if: ${{ inputs.version != ''}}
36
+ run: |
37
+ echo ${{ inputs.version}} > version.txt
38
+ echo "h2ogpt-wheel-version = $(cat version.txt)"
39
+
40
+ - name: Build Wheel
41
+ run: make clean dist
42
+
43
+ - name: Publish to Test-PyPI
44
+ if: ${{ inputs.pypi-index == 'Test-PyPI' }}
45
+ run: |
46
+ twine upload -r testpypi dist/*
47
+ env:
48
+ TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
49
+ TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
50
+
51
+ - name: Publish to PyPI
52
+ if: ${{ inputs.pypi-index == 'PyPI' }}
53
+ run: |
54
+ twine upload dist/*
55
+ env:
56
+ TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
57
+ TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
.github/workflows/snyk-scan.yml ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Snyk Security Vulnerability Scan
2
+
3
+ on:
4
+ workflow_dispatch:
5
+ pull_request:
6
+ push:
7
+ tags:
8
+ - 'v[0-9]+.[0-9]+.[0-9]+'
9
+ branches:
10
+ - main
11
+
12
+ jobs:
13
+ snyk_scan_test:
14
+ if: ${{ github.event_name == 'pull_request' || github.event_name == 'workflow_dispatch' }}
15
+ runs-on: ubuntu-latest
16
+ steps:
17
+ - uses: actions/checkout@master
18
+ - uses: snyk/actions/setup@master
19
+
20
+ - uses: actions/setup-python@v4
21
+ with:
22
+ python-version: '3.10'
23
+
24
+ - name: Check changed Deps files
25
+ uses: tj-actions/changed-files@v35
26
+ id: changed-files
27
+ with:
28
+ files: | # This will match all the files with below patterns
29
+ requirements.txt
30
+
31
+ - name: Scan python dependencies
32
+ if: contains(steps.changed-files.outputs.all_changed_and_modified_files, 'requirements.txt')
33
+ env:
34
+ SNYK_TOKEN: '${{ secrets.SNYK_TOKEN }}'
35
+ run: |
36
+ head -n 41 requirements.txt > temp-requirements.txt #remove test deps
37
+ python3.10 -m pip install -r temp-requirements.txt
38
+ snyk test \
39
+ -d \
40
+ --file=temp-requirements.txt \
41
+ --package-manager=pip \
42
+ --command=python3.10 \
43
+ --skip-unresolved \
44
+ --severity-threshold=high
45
+
46
+ snyk_scan_monitor:
47
+ if: ${{ github.event_name == 'push' || github.event_name == 'workflow_dispatch'}}
48
+ runs-on: ubuntu-latest
49
+ steps:
50
+ - uses: actions/checkout@master
51
+ - uses: snyk/actions/setup@master
52
+
53
+ - uses: actions/setup-python@v4
54
+ with:
55
+ python-version: '3.10'
56
+
57
+ - name: Extract github branch/tag name
58
+ shell: bash
59
+ run: echo "ref=$(echo ${GITHUB_REF##*/})" >> $GITHUB_OUTPUT
60
+ id: extract_ref
61
+
62
+ - name: Monitor python dependencies
63
+ env:
64
+ SNYK_TOKEN: '${{ secrets.SNYK_TOKEN }}'
65
+ run: |
66
+ head -n 41 requirements.txt > temp-requirements.txt #remove test deps
67
+ python3.10 -m pip install -r temp-requirements.txt
68
+ snyk monitor \
69
+ -d \
70
+ --file=temp-requirements.txt \
71
+ --command=python3.10 \
72
+ --package-manager=pip \
73
+ --skip-unresolved \
74
+ --remote-repo-url=h2ogpt/${{ steps.extract_ref.outputs.ref }} \
75
+ --org=h2o-gpt \
76
+ --project-name=H2O-GPT/h2ogpt/${{ steps.extract_ref.outputs.ref }}/requirements.txt
.gitignore ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ out/
2
+ 7B/
3
+ 13B/
4
+ __pycache__/
5
+ checkpoint**
6
+ minimal-llama**
7
+ upload.py
8
+ lora-**
9
+ *ckpt
10
+ wandb
11
+ evaluate.py
12
+ test_data.json
13
+ todo.txt
14
+ .neptune/
15
+ *.bin
16
+ db_dir_UserData
17
+ temp_path_do_doc1
18
+ offline_folder
19
+ flagged_data_points
20
+ .pytest_cache
21
+ user_path
22
+ user_path_test
23
+ build
24
+ h2ogpt.egg-info
25
+ dist
26
+ .idea
27
+ .cache
28
+ .local
29
+ .bash_history
30
+ .benchmarks
31
+ Dockerfile-runner.dockerfile
32
+ build_info.txt
33
+ prebuilt_deps
34
+ Dockerfile_deps
35
+
36
+ # IDEs
37
+ .idea/
38
+
39
+ # virtual envs
40
+ venv
41
+
42
+ # Mac one click installer
43
+ Tesseract-OCR/
44
+ poppler/
.ipynb_checkpoints/generate-checkpoint.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+
4
+ if os.path.dirname(os.path.abspath(__file__)) not in sys.path:
5
+ sys.path.append(os.path.dirname(os.path.abspath(__file__)))
6
+
7
+ from src.gen import main
8
+ from src.utils import H2O_Fire
9
+
10
+
11
+ def entrypoint_main():
12
+ H2O_Fire(main)
13
+
14
+
15
+ if __name__ == "__main__":
16
+ entrypoint_main()
.ipynb_checkpoints/requirements-checkpoint.txt ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # for generate (gradio server) and finetune
2
+ datasets==2.13.0
3
+ sentencepiece==0.1.99
4
+ gradio==3.50.2
5
+ sse_starlette==1.8.2
6
+ huggingface_hub==0.19.4
7
+ appdirs==1.4.4
8
+ fire==0.5.0
9
+ docutils==0.20.1
10
+ torch==2.1.2; sys_platform != "darwin" and platform_machine != "arm64"
11
+ torch==2.1.2; sys_platform == "darwin" and platform_machine == "arm64"
12
+ evaluate==0.4.0
13
+ rouge_score==0.1.2
14
+ sacrebleu==2.3.1
15
+ scikit-learn==1.2.2
16
+ # optional (need to uncomment code in gradio_runner.py for import of better_profanity)
17
+ # alt-profanity-check==1.2.2
18
+ # better-profanity==0.7.0
19
+ numpy==1.24.3
20
+ pandas==2.0.2
21
+ matplotlib==3.7.1
22
+ loralib==0.1.1
23
+ bitsandbytes==0.41.3
24
+ accelerate==0.25.0
25
+ peft==0.7.1
26
+ transformers==4.36.2
27
+ tokenizers==0.15.0
28
+ APScheduler==3.10.1
29
+
30
+ # optional for generate
31
+ pynvml==11.5.0
32
+ psutil==5.9.5
33
+ boto3==1.26.101
34
+ botocore==1.29.101
35
+
36
+ # optional for finetune
37
+ tensorboard==2.13.0
38
+ neptune==1.2.0
39
+
40
+ # for gradio client
41
+ gradio_client==0.6.1
42
+ beautifulsoup4==4.12.2
43
+ markdown==3.4.3
44
+
45
+ # data and testing
46
+ pytest==7.2.2
47
+ pytest-xdist==3.2.1
48
+ nltk==3.8.1
49
+ textstat==0.7.3
50
+ # pandoc==2.3
51
+ pypandoc==1.11; sys_platform == "darwin" and platform_machine == "arm64"
52
+ pypandoc_binary==1.11; platform_machine == "x86_64"
53
+ pypandoc_binary==1.11; platform_system == "Windows"
54
+ python-magic-bin==0.4.14; platform_system == "Windows"
55
+ openpyxl==3.1.2
56
+ lm_dataformat==0.0.20
57
+ bioc==2.0
58
+
59
+ # falcon
60
+ einops==0.6.1
61
+ instructorembedding==1.0.1
62
+
63
+ # for gpt4all .env file, but avoid worrying about imports
64
+ python-dotenv==1.0.0
65
+
66
+ text-generation==0.6.1
67
+ # for tokenization when don't have HF tokenizer
68
+ tiktoken==0.5.2
69
+
70
+ requests>=2.31.0
71
+ httpx==0.24.1
72
+ urllib3>=1.26.16
73
+ filelock>=3.12.2
74
+ joblib>=1.3.1
75
+ tqdm>=4.65.0
76
+ tabulate>=0.9.0
77
+ packaging>=23.1
Dockerfile ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # devel needed for bitsandbytes requirement of libcudart.so, otherwise runtime sufficient
2
+ FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04
3
+
4
+ ENV DEBIAN_FRONTEND=noninteractive
5
+
6
+ ENV PATH="/h2ogpt_conda/bin:${PATH}"
7
+ ARG PATH="/h2ogpt_conda/bin:${PATH}"
8
+
9
+ ENV HOME=/workspace
10
+ ENV CUDA_HOME=/usr/local/cuda-11.8
11
+ ENV VLLM_CACHE=/workspace/.vllm_cache
12
+ ENV TIKTOKEN_CACHE_DIR=/workspace/tiktoken_cache
13
+
14
+ WORKDIR /workspace
15
+
16
+ COPY . /workspace/
17
+
18
+ RUN cd /workspace && ./docker_build_script_ubuntu.sh
19
+
20
+ RUN chmod -R a+rwx /workspace
21
+
22
+ ARG user=h2ogpt
23
+ ARG group=h2ogpt
24
+ ARG uid=1000
25
+ ARG gid=1000
26
+
27
+ RUN groupadd -g ${gid} ${group} && useradd -u ${uid} -g ${group} -s /bin/bash ${user}
28
+
29
+ EXPOSE 8888
30
+ EXPOSE 7860
31
+ EXPOSE 5000
32
+
33
+ USER h2ogpt
34
+
35
+ ENTRYPOINT ["python3.10"]
LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright 2023 Rohan Taori, Ishaan Gulrajani, Tianyi Zhang, Yann Dubois, Xuechen Li
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
Makefile ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ all: clean dist
2
+
3
+ PACKAGE_VERSION := `cat version.txt | tr -d '\n'`
4
+ BUILD_TAG := $(shell git describe --always --dirty)
5
+ DOCKER_TEST_IMAGE := harbor.h2o.ai/h2ogpt/test-image:$(BUILD_TAG)
6
+ PYTHON_BINARY ?= `which python`
7
+ DEFAULT_MARKERS ?= "not need_tokens and not need_gpu"
8
+
9
+ .PHONY: venv dist test publish docker_build build_info.txt
10
+
11
+ clean:
12
+ rm -rf dist build h2ogpt.egg-info
13
+
14
+ venv:
15
+ $(PYTHON_BINARY) -m virtualenv -p $(PYTHON_BINARY) venv
16
+
17
+ install:
18
+ $(PYTHON_BINARY) -m pip install dist/h2ogpt-$(PACKAGE_VERSION)-py3-none-any.whl
19
+
20
+ install-%:
21
+ $(PYTHON_BINARY) -m pip install dist/h2ogpt-$(PACKAGE_VERSION)-py3-none-any.whl[$*]
22
+
23
+ dist:
24
+ $(PYTHON_BINARY) setup.py bdist_wheel
25
+
26
+ test:
27
+ $(PYTHON_BINARY) -m pip install requirements-parser
28
+ $(PYTHON_BINARY) -m pytest tests --disable-warnings --junit-xml=test_report.xml -m "$(DEFAULT_MARKERS)"
29
+
30
+ test_imports:
31
+ $(PYTHON_BINARY) -m pytest tests/test_imports.py --disable-warnings --junit-xml=test_report.xml -m "$(DEFAULT_MARKERS)"
32
+
33
+ publish:
34
+ echo "Publishing not implemented yet."
35
+
36
+ build_info.txt:
37
+ @rm -rf build_info.txt
38
+ @echo "commit=\"$(shell git rev-parse HEAD)\"" >> $@
39
+ @echo "branch=\"`git rev-parse HEAD | git branch -a --contains | grep -v detached | sed -e 's~remotes/origin/~~g' -e 's~^ *~~' | sort | uniq | tr '*\n' ' '`\"" >> $@
40
+ @echo "describe=\"`git describe --always --dirty`\"" >> $@
41
+ @echo "build_os=\"`uname -a`\"" >> $@
42
+ @echo "build_machine=\"`hostname`\"" >> $@
43
+ @echo "build_date=\"$(shell date "+%Y%m%d")\"" >> $@
44
+ @echo "build_user=\"`id -u -n`\"" >> $@
45
+ @echo "base_version=\"$(PACKAGE_VERSION)\"" >> $@
46
+
47
+ git_hash.txt:
48
+ @echo "$(shell git rev-parse HEAD)" >> $@
49
+
50
+ # Deprecated for now, no 0.4.1 on pypi, use release binary wheel that has no CUDA errors anymore
51
+ docker_build_deps:
52
+ @cp docker_build_script_ubuntu.sh docker_build_script_ubuntu.sh.back
53
+ @sed -i '/# Install prebuilt dependencies/,$$d' docker_build_script_ubuntu.sh
54
+ @docker build -t h2ogpt-deps-builder -f Dockerfile .
55
+ @mv docker_build_script_ubuntu.sh.back docker_build_script_ubuntu.sh
56
+ @mkdir -p prebuilt_deps
57
+ @docker run \
58
+ --rm \
59
+ -it \
60
+ --entrypoint bash \
61
+ --runtime nvidia \
62
+ -v `pwd`:/dot \
63
+ -v /etc/passwd:/etc/passwd:ro \
64
+ -v /etc/group:/etc/group:ro \
65
+ -u `id -u`:`id -g` \
66
+ h2ogpt-deps-builder -c " \
67
+ mkdir -p /dot/prebuilt_deps && cd /dot/prebuilt_deps && \
68
+ GITHUB_ACTIONS=true python3.10 -m pip install auto-gptq==0.4.2 --no-cache-dir --use-deprecated=legacy-resolver && \
69
+ python3.10 -m pip wheel auto-gptq==0.4.2 \
70
+ "
71
+ @docker run \
72
+ --rm \
73
+ -it \
74
+ --entrypoint bash \
75
+ -v `pwd`:/dot \
76
+ quay.io/pypa/manylinux2014_x86_64 -c " \
77
+ ln -s /usr/local/bin/python3.10 /usr/local/bin/python3 && cd /tmp && \
78
+ git clone https://github.com/h2oai/duckdb.git && \
79
+ cd duckdb && \
80
+ git checkout dcd8c1ffc53dd020623630efb99ba6a3a4cbc5ad && \
81
+ BUILD_PYTHON=1 make release && \
82
+ cd tools/pythonpkg && \
83
+ python3.10 setup.py bdist_wheel && \
84
+ cp dist/duckdb-0.*.whl /dot/prebuilt_deps \
85
+ "
86
+ s3cmd put prebuilt_deps/auto_gptq-0.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl s3://artifacts.h2o.ai/deps/h2ogpt/ && \
87
+ s3cmd setacl s3://artifacts.h2o.ai/deps/h2ogpt/auto_gptq-0.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl --acl-public
88
+ s3cmd put prebuilt_deps/duckdb-0.8.2.dev4026+gdcd8c1ffc5-cp310-cp310-linux_x86_64.whl s3://artifacts.h2o.ai/deps/h2ogpt/ && \
89
+ s3cmd setacl s3://artifacts.h2o.ai/deps/h2ogpt/duckdb-0.8.2.dev4026+gdcd8c1ffc5-cp310-cp310-linux_x86_64.whl --acl-public
90
+
91
+ docker_build: build_info.txt
92
+ ifeq ($(shell curl --connect-timeout 4 --write-out %{http_code} -sS --output /dev/null -X GET http://harbor.h2o.ai/api/v2.0/projects/h2ogpt/repositories/test-image/artifacts/$(BUILD_TAG)/tags),200)
93
+ @echo "Image already pushed to Harbor: $(DOCKER_TEST_IMAGE)"
94
+ else
95
+ DOCKER_BUILDKIT=1 docker build -t $(DOCKER_TEST_IMAGE) -f Dockerfile .
96
+ docker push $(DOCKER_TEST_IMAGE)
97
+ endif
98
+
99
+ just_docker_build: build_info.txt
100
+ DOCKER_BUILDKIT=1 docker build -t $(DOCKER_TEST_IMAGE) -f Dockerfile .
101
+
102
+ docker_build_runner: docker_build
103
+ -docker pull $(DOCKER_TEST_IMAGE)
104
+ docker tag $(DOCKER_TEST_IMAGE) gcr.io/vorvan/h2oai/h2ogpt-runtime:$(BUILD_TAG)
105
+ docker tag $(DOCKER_TEST_IMAGE) gcr.io/vorvan/h2oai/h2ogpt-runtime:$(PACKAGE_VERSION)
106
+ docker tag $(DOCKER_TEST_IMAGE) gcr.io/vorvan/h2oai/h2ogpt-runtime:latest
107
+ docker push gcr.io/vorvan/h2oai/h2ogpt-runtime:$(BUILD_TAG)
108
+ docker push gcr.io/vorvan/h2oai/h2ogpt-runtime:$(PACKAGE_VERSION)
109
+ docker push gcr.io/vorvan/h2oai/h2ogpt-runtime:latest
110
+ ifdef BUILD_ID
111
+ docker tag $(DOCKER_TEST_IMAGE) gcr.io/vorvan/h2oai/h2ogpt-runtime:$(PACKAGE_VERSION)-$(BUILD_ID)
112
+ docker push gcr.io/vorvan/h2oai/h2ogpt-runtime:$(PACKAGE_VERSION)-$(BUILD_ID)
113
+ endif
114
+
115
+ print-%:
116
+ @echo $($*)
README.md CHANGED
@@ -1,12 +1,317 @@
1
  ---
2
- title: Test
3
- emoji:
4
- colorFrom: yellow
5
- colorTo: yellow
6
  sdk: gradio
7
- sdk_version: 4.15.0
8
- app_file: app.py
9
- pinned: false
10
  ---
 
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: test
3
+ app_file: generate.py
 
 
4
  sdk: gradio
5
+ sdk_version: 3.50.2
 
 
6
  ---
7
+ # h2oGPT
8
 
9
+ Turn into (top-right corner) if you like the project!
10
+
11
+ Query and summarize your documents or just chat with local private GPT LLMs using h2oGPT, an Apache V2 open-source project.
12
+
13
+ - **Private** offline database of any documents [(PDFs, Excel, Word, Images, Video Frames, Youtube, Audio, Code, Text, MarkDown, etc.)](docs/README_LangChain.md#supported-datatypes)
14
+ - **Persistent** database (Chroma, Weaviate, or in-memory FAISS) using accurate embeddings (instructor-large, all-MiniLM-L6-v2, etc.)
15
+ - **Efficient** use of context using instruct-tuned LLMs (no need for LangChain's few-shot approach)
16
+ - **Parallel** summarization and extraction, reaching an output of 80 tokens per second with the 13B LLaMa2 model
17
+ - **HYDE** (Hypothetical Document Embeddings) for enhanced retrieval based upon LLM responses
18
+ - **Variety** of models supported (LLaMa2, Mistral, Falcon, Vicuna, WizardLM. With AutoGPTQ, 4-bit/8-bit, LORA, etc.)
19
+ - **GPU** support from HF and LLaMa.cpp GGML models, and **CPU** support using HF, LLaMa.cpp, and GPT4ALL models
20
+ - **Attention Sinks** for [arbitrarily long](https://github.com/tomaarsen/attention_sinks) generation (LLaMa-2, Mistral, MPT, Pythia, Falcon, etc.)
21
+ - **UI** or CLI with streaming of all models
22
+ - **Upload** and **View** documents through the UI (control multiple collaborative or personal collections)
23
+ - **Vision LLaVa** Model and **Stable Diffusion** Image Generation
24
+ - **Voice STT** using Whisper with streaming audio conversion
25
+ - **Voice TTS** using MIT-Licensed Microsoft Speech T5 with multiple voices and Streaming audio conversion
26
+ - **Voice TTS** using MPL2-Licensed TTS including Voice Cloning and Streaming audio conversion
27
+ - **AI Assistant Voice Control Mode** for hands-free control of h2oGPT chat
28
+ - **Bake-off** UI mode against many models at the same time
29
+ - **Easy Download** of model artifacts and control over models like LLaMa.cpp through the UI
30
+ - **Authentication** in the UI by user/password
31
+ - **State Preservation** in the UI by user/password
32
+ - **Linux, Docker, macOS, and Windows** support
33
+ - [**Easy Windows Installer**](#windows-1011-64-bit-with-full-document-qa-capability) for Windows 10 64-bit (CPU/CUDA)
34
+ - [**Easy macOS Installer**](#macos-cpum1m2-with-full-document-qa-capability) for macOS (CPU/M1/M2)
35
+ - **Inference Servers** support (HF TGI server, vLLM, Gradio, ExLLaMa, Replicate, OpenAI, Azure OpenAI, Anthropic)
36
+ - **OpenAI-compliant**
37
+ - Server Proxy API (h2oGPT acts as drop-in-replacement to OpenAI server)
38
+ - Python client API (to talk to Gradio server)
39
+ - **Web-Search** integration with Chat and Document Q/A
40
+ - **Agents** for Search, Document Q/A, Python Code, CSV frames (Experimental, best with OpenAI currently)
41
+ - **Evaluate** performance using reward models
42
+ - **Quality** maintained with over 1000 unit and integration tests taking over 4 GPU-hours
43
+
44
+ ### Get Started
45
+
46
+ [![GitHub license](https://img.shields.io/github/license/NVIDIA/nvidia-docker?style=flat-square)](https://raw.githubusercontent.com/h2oai/h2ogpt/main/LICENSE)
47
+ [![Linux](https://img.shields.io/badge/Linux-FCC624?style=for-the-badge&logo=linux&logoColor=black)](https://github.com/h2oai/h2ogpt/blob/main/docs/README_LINUX.md)
48
+ [![macOS](https://img.shields.io/badge/mac%20os-000000?style=for-the-badge&logo=macos&logoColor=F0F0F0)](https://github.com/h2oai/h2ogpt/blob/main/docs/README_MACOS.md)
49
+ [![Windows](https://img.shields.io/badge/Windows-0078D6?style=for-the-badge&logo=windows&logoColor=white)](https://github.com/h2oai/h2ogpt/blob/main/docs/README_WINDOWS.md)
50
+ [![Docker](https://img.shields.io/badge/docker-%230db7ed.svg?style=for-the-badge&logo=docker&logoColor=white)](https://github.com/h2oai/h2ogpt/blob/main/docs/README_DOCKER.md)
51
+
52
+
53
+ To quickly try out h2oGPT with limited document Q/A capability, create a fresh Python 3.10 environment and run:
54
+ * CPU or MAC (M1/M2):
55
+ ```bash
56
+ # for windows/mac use "set" or relevant environment setting mechanism
57
+ export PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
58
+ ```
59
+ * Linux/Windows CUDA:
60
+ ```bash
61
+ # for windows/mac use "set" or relevant environment setting mechanism
62
+ export PIP_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cu118"
63
+ ```
64
+ Then run the following commands on any system:
65
+ ```bash
66
+ git clone https://github.com/h2oai/h2ogpt.git
67
+ cd h2ogpt
68
+ pip install -r requirements.txt
69
+ pip install -r reqs_optional/requirements_optional_langchain.txt
70
+ pip install -r reqs_optional/requirements_optional_gpt4all.txt
71
+ pip install -r reqs_optional/requirements_optional_langchain.urls.txt
72
+ # GPL, only run next line if that is ok:
73
+ # pip install -r reqs_optional/requirements_optional_langchain.gpllike.txt
74
+
75
+ python generate.py --base_model=TheBloke/zephyr-7B-beta-GGUF --prompt_type=zephyr --max_seq_len=4096
76
+ ```
77
+ Next, go to your browser by visiting [http://127.0.0.1:7860](http://127.0.0.1:7860) or [http://localhost:7860](http://localhost:7860). Choose 13B for a better model than 7B.
78
+ If you encounter issues with `llama-cpp-python` or other packages that try to compile and fail, try binary wheels for your platform as linked in the detailed instructions below. For AVX1 or AMD ROC systems, edit `reqs_optional/requirements_optional_gpt4all.txt` to choose valid packages.
79
+
80
+ We recommend quantized models for most small-GPU systems, e.g. [LLaMa-2-7B-Chat-GGUF](https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q6_K.gguf) for 9GB+ GPU memory or larger models like [LLaMa-2-13B-Chat-GGUF](https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-13b-chat.Q6_K.gguf) if you have 16GB+ GPU memory.
81
+
82
+ ---
83
+
84
+ Note that for all platforms, some packages such as DocTR, Unstructured, BLIP, Stable Diffusion, etc. download models at runtime that appear to delay operations in the UI. The progress appears in the console logs.
85
+
86
+ #### Windows 10/11 64-bit with full document Q/A capability
87
+ * One-Click Installer
88
+ * CPU or GPU: Download [h2oGPT Windows Installer](https://h2o-release.s3.amazonaws.com/h2ogpt/Jan2024/h2oGPT_0.0.1.exe) (1.3GB file)
89
+ * Once installed, feel free to change start directory for icon from `%HOMEDRIVE%\%HOMEPATH%` to (e.g.) `%HOMEDRIVE%\%HOMEPATH%\h2ogpt_data` so all created files (like database) go there. All paths saved are relative to this path.
90
+ * CPU: Click the h2oGPT icon in the Start menu. Give it about 15 seconds to open in a browser if many optional packages are included. By default, the browser will launch with the actual local IP address, not localhost.
91
+ * GPU: Before starting, run the following commands (replace `pseud` with your user):
92
+ ```
93
+ C:\Users\pseud\AppData\Local\Programs\h2oGPT\Python\python.exe -m pip uninstall -y torch
94
+ C:\Users\pseud\AppData\Local\Programs\h2oGPT\Python\python.exe -m pip install https://h2o-release.s3.amazonaws.com/h2ogpt/torch-2.1.2%2Bcu118-cp310-cp310-win_amd64.whl
95
+ ```
96
+ Now click the h2oGPT icon in the Start menu. Give it about 20 seconds to open in a browser if many optional packages are included. By default, the browser will launch with the actual local IP address, not localhost.
97
+ * To debug any issues, run the following (replace `pseud` with your user):
98
+ ```
99
+ C:\Users\pseud\AppData\Local\Programs\h2oGPT\Python\python.exe "C:\Users\pseud\AppData\Local\Programs\h2oGPT\h2oGPT.launch.pyw"
100
+ ```
101
+ Any start-up exceptions are appended to log, e.g. `C:\Users\pseud\h2ogpt_exception.log`.
102
+ * To control startup, tweak the python startup file, e.g. for user `pseud`: `C:\Users\pseud\AppData\Local\Programs\h2oGPT\pkgs\win_run_app.py`
103
+ * In this Python code, set ENVs anywhere before main_h2ogpt() is called
104
+ * E.g. `os.environ['name'] = 'value'`, e.g. `os.environ['n_jobs'] = '10'` (must be always a string).
105
+ * Environment variables can be changed, e.g.:
106
+ * `n_jobs`: number of cores for various tasks
107
+ * `OMP_NUM_THREADS` thread count for LLaMa
108
+ * `CUDA_VISIBLE_DEVICES` which GPUs are used. Recommend set to single fast GPU, e.g. `CUDA_VISIBLE_DEVICES=0` if have multiple GPUs. Note that UI cannot control which GPUs (or CPU mode) for LLaMa models.
109
+ * Any CLI argument from `python generate.py --help` with environment variable set as `h2ogpt_x`, e.g. `h2ogpt_h2ocolors` to `False`.
110
+ * Set env `h2ogpt_server_name` to actual IP address for LAN to see app, e.g. `h2ogpt_server_name` to `192.168.1.172` and allow access through firewall if have Windows Defender activated.
111
+ * One can tweak installed h2oGPT code at, e.g. `C:\Users\pseud\AppData\Local\Programs\h2oGPT`.
112
+ * To terminate the app, go to System Tab and click Admin and click Shutdown h2oGPT.
113
+ * If startup fails, run as console and check for errors, e.g. and kill any old Python processes.
114
+
115
+ * [Full Windows 10/11 Manual Installation Script](docs/README_WINDOWS.md)
116
+ * Single `.bat` file for installation (if you do not skip any optional packages, takes about 9GB filled on disk).
117
+ * Recommend base Conda env, which allows for DocTR that requires pygobject that has otherwise no support (except `mysys2` that cannot be used by h2oGPT).
118
+ * Also allows for the TTS package by Coqui, which is otherwise not currently enabled in the one-click installer.
119
+
120
+ ---
121
+
122
+ #### Linux (CPU/CUDA) with full document Q/A capability
123
+ * [Docker Build and Run Docs](docs/README_DOCKER.md)
124
+ * [Linux Manual Install and Run Docs](docs/README_LINUX.md)
125
+
126
+ ---
127
+
128
+ #### macOS (CPU/M1/M2) with full document Q/A capability
129
+ * One-click Installers (Experimental and subject to changes)
130
+
131
+ Nov 08, 2023
132
+ - [h2ogpt-osx-m1-cpu](https://h2o-release.s3.amazonaws.com/h2ogpt/Nov2023/h2ogpt-osx-m1-cpu)
133
+ - [h2ogpt-osx-m1-gpu](https://h2o-release.s3.amazonaws.com/h2ogpt/Nov2023/h2ogpt-osx-m1-gpu)
134
+
135
+ Download the runnable file and open it from the Finder. It will take a few minutes to unpack and run the application.
136
+ These one-click installers are experimental. Report any issues with steps to reproduce at https://github.com/h2oai/h2ogpt/issues.
137
+
138
+ **Note:** The app bundle is unsigned. If you experience any issues with running the app, run the following commands:
139
+ ```bash
140
+ $ xattr -dr com.apple.quarantine {file-path}/h2ogpt-osx-m1-gpu
141
+ $ chmod +x {file-path}/h2ogpt-osx-m1-gpu
142
+ ```
143
+ * [macOS Manual Install and Run Docs](docs/README_MACOS.md)
144
+
145
+ ---
146
+
147
+ #### Example Models
148
+ * [Highest accuracy and speed](https://huggingface.co/h2oai/h2ogpt-4096-llama2-70b-chat) on 16-bit with TGI/vLLM using ~48GB/GPU when in use (4xA100 high concurrency, 2xA100 for low concurrency)
149
+ * [Middle-range accuracy](https://huggingface.co/h2oai/h2ogpt-gm-oasst1-en-2048-falcon-40b-v2) on 16-bit with TGI/vLLM using ~45GB/GPU when in use (2xA100)
150
+ * [Small memory profile with ok accuracy](https://huggingface.co/TheBloke/Llama-2-13B-Chat-GGUF) 16GB GPU if full GPU offloading
151
+ * [Balanced accuracy and size](https://huggingface.co/h2oai/h2ogpt-4096-llama2-13b-chat) on 16-bit with TGI/vLLM using ~45GB/GPU when in use (1xA100)
152
+ * [Smallest or CPU friendly](https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF) 32GB system ram or 9GB GPU if full GPU offloading
153
+ * [Best for 4*A10G using g5.12xlarge](https://huggingface.co/TheBloke/Llama-2-70B-chat-AWQ) AWQ LLaMa 70B using 4*A10G using vLLM
154
+
155
+ **GPU** mode requires CUDA support via torch and transformers. A 7B/13B model in 16-bit uses 14GB/26GB of GPU memory to store the weights (2 bytes per weight). Compression such as 4-bit precision (bitsandbytes, AWQ, GPTQ, etc.) can further reduce memory requirements down to less than 6GB when asking a question about your documents. (For more information, see [low-memory mode](docs/FAQ.md#low-memory-mode).)
156
+
157
+ **CPU** mode uses GPT4ALL and LLaMa.cpp, e.g. gpt4all-j, requiring about 14GB of system RAM in typical use.
158
+
159
+ ---
160
+
161
+ ### Live Demos
162
+ - [![img-small.png](docs/img-small.png) Live h2oGPT Document Q/A Demo](https://gpt.h2o.ai/)
163
+ - [🤗 Live h2oGPT Chat Demo 1](https://huggingface.co/spaces/h2oai/h2ogpt-chatbot)
164
+ - [🤗 Live h2oGPT Chat Demo 2](https://huggingface.co/spaces/h2oai/h2ogpt-chatbot2)
165
+ - [![](https://colab.research.google.com/assets/colab-badge.svg) h2oGPT CPU](https://colab.research.google.com/drive/13RiBdAFZ6xqDwDKfW6BG_-tXfXiqPNQe?usp=sharing)
166
+ - [![](https://colab.research.google.com/assets/colab-badge.svg) h2oGPT GPU](https://colab.research.google.com/drive/143-KFHs2iCqXTQLI2pFCDiR69z0dR8iE?usp=sharing)
167
+
168
+ ### Inference Benchmarks for Summarization & Generation
169
+
170
+ * [Benchmark results for Llama2](https://github.com/h2oai/h2ogpt/blob/main/benchmarks/perf.md)
171
+ * [pytest to create benchmark results](https://github.com/h2oai/h2ogpt/blob/main/tests/test_perf_benchmarks.py)
172
+ * [Raw benchmark results (JSON)](https://github.com/h2oai/h2ogpt/blob/main/benchmarks/perf.json)
173
+
174
+ ### Resources
175
+ - [Discord](https://discord.gg/WKhYMWcVbq)
176
+ - [Models (LLaMa-2, Falcon 40, etc.) at 🤗](https://huggingface.co/h2oai/)
177
+ - [YouTube: 100% Offline ChatGPT Alternative?](https://www.youtube.com/watch?v=Coj72EzmX20)
178
+ - [YouTube: Ultimate Open-Source LLM Showdown (6 Models Tested) - Surprising Results!](https://www.youtube.com/watch?v=FTm5C_vV_EY)
179
+ - [YouTube: Blazing Fast Falcon 40b 🚀 Uncensored, Open-Source, Fully Hosted, Chat With Your Docs](https://www.youtube.com/watch?v=H8Dx-iUY49s)
180
+ - [Technical Paper: https://arxiv.org/pdf/2306.08161.pdf](https://arxiv.org/pdf/2306.08161.pdf)
181
+
182
+ ### Partners
183
+
184
+ - [Live Leaderboard](https://evalgpt.ai/) for GPT-4 Elo Evaluation of Instruct/Chat models with [h2o-LLM-eval](https://github.com/h2oai/h2o-LLM-eval).
185
+ - Advanced fine-tuning with [H2O LLM Studio](https://github.com/h2oai/h2o-llmstudio)
186
+
187
+ ### Video Demo
188
+
189
+ https://github.com/h2oai/h2ogpt/assets/2249614/2f805035-2c85-42fb-807f-fd0bca79abc6
190
+
191
+ YouTube 4K version: https://www.youtube.com/watch?v=_iktbj4obAI
192
+
193
+ ### Docs Guide
194
+ <!-- cat README.md | ./gh-md-toc - But Help is heavily processed -->
195
+ * [Get Started](#get-started)
196
+ * [Linux (CPU or CUDA)](docs/README_LINUX.md)
197
+ * [macOS (CPU or M1/M2)](docs/README_MACOS.md)
198
+ * [Windows 10/11 (CPU or CUDA)](docs/README_WINDOWS.md)
199
+ * [GPU (CUDA, AutoGPTQ, exllama) Running Details](docs/README_GPU.md)
200
+ * [CPU Running Details](docs/README_CPU.md)
201
+ * [CLI chat](docs/README_CLI.md)
202
+ * [Gradio UI](docs/README_ui.md)
203
+ * [Client API (Gradio, OpenAI-Compliant)](docs/README_CLIENT.md)
204
+ * [Inference Servers (HF TGI server, vLLM, Gradio, ExLLaMa, Replicate, OpenAI, Azure OpenAI)](docs/README_InferenceServers.md)
205
+ * [Python Wheel](docs/README_WHEEL.md)
206
+ * [Offline Installation](docs/README_offline.md)
207
+ * [Low Memory](docs/FAQ.md#low-memory-mode)
208
+ * [Docker](docs/README_DOCKER.md)
209
+ * [LangChain Document Support](docs/README_LangChain.md)
210
+ * [Compare to PrivateGPT et al.](docs/README_LangChain.md#what-is-h2ogpts-langchain-integration-like)
211
+ * [Roadmap](#roadmap)
212
+ * [Development](#development)
213
+ * [Help](#help)
214
+ * [LangChain file types supported](docs/README_LangChain.md#supported-datatypes)
215
+ * [CLI Database control](docs/README_LangChain.md#database-creation)
216
+ * [FAQ](docs/FAQ.md)
217
+ * [Model Usage Notes](docs/FAQ.md#model-usage-notes)
218
+ * [Adding LLM Models (including using GGUF and Attention Sinks)](docs/FAQ.md#adding-models)
219
+ * [Adding Embedding Models](docs/FAQ.md#add-new-embedding-model)
220
+ * [Adding Prompts](docs/FAQ.md#adding-prompt-templates)
221
+ * [In-Context Learning](docs/FAQ.md#in-context-learning-via-prompt-engineering)
222
+ * [Multiple GPUs](docs/FAQ.md#multiple-gpus)
223
+ * [Low-Memory Usage](docs/FAQ.md#low-memory-mode)
224
+ * [Environment Variables](docs/FAQ.md#what-envs-can-i-pass-to-control-h2ogpt)
225
+ * [HTTPS access for server and client](docs/FAQ.md#https-access-for-server-and-client)
226
+ * [Useful Links](docs/LINKS.md)
227
+ * [Fine-Tuning](docs/FINETUNE.md)
228
+ * [Triton](docs/TRITON.md)
229
+ * [Commercial viability](docs/FAQ.md#commercial-viability)
230
+ * [Acknowledgements](#acknowledgements)
231
+ * [Why H2O.ai?](#why-h2oai)
232
+ * [Disclaimer](#disclaimer)
233
+
234
+ ### Experimental features
235
+
236
+ These are not part of normal installation instructions and are experimental.
237
+
238
+ * [Agents](docs/README_Agents.md) -- in Alpha testing. Optimal for OpenAI, but that also fails sometimes.
239
+
240
+ ### Roadmap
241
+
242
+ - Integration of code and resulting LLMs with downstream applications and low/no-code platforms
243
+ - Complement h2oGPT chatbot with other APIs like [ToolBench](https://github.com/OpenBMB/ToolBench)
244
+ - Enhance the model's code completion, reasoning, and mathematical capabilities, ensure factual correctness, minimize hallucinations, and avoid repetitive output
245
+ - Add better agents for SQL and CSV question/answer
246
+
247
+ ### Development
248
+
249
+ - To create a development environment for training and generation, follow the [installation instructions](docs/INSTALL.md).
250
+ - To fine-tune any LLM models on your data, follow the [fine-tuning instructions](docs/FINETUNE.md).
251
+ - To run h2oGPT tests:
252
+ ```bash
253
+ pip install requirements-parser pytest-instafail pytest-random-order
254
+ pip install playsound==1.3.0
255
+ pytest --instafail -s -v tests
256
+ # for client tests
257
+ make -C client setup
258
+ make -C client build
259
+ pytest --instafail -s -v client/tests
260
+ # for openai server test on already-running local server
261
+ pytest -s -v -n 4 openai_server/test_openai_server.py::test_openai_client
262
+ ```
263
+ or tweak/run `tests/test4gpus.sh` to run tests in parallel.
264
+
265
+ ### Help
266
+
267
+ - [FAQs](docs/FAQ.md)
268
+
269
+ - [README for LangChain](docs/README_LangChain.md)
270
+
271
+ - Useful [links](docs/LINKS.md) for additional context and information on competitors, models, and datasets
272
+
273
+ ### Acknowledgements
274
+
275
+ * Some training code was based upon March 24 version of [Alpaca-LoRA](https://github.com/tloen/alpaca-lora/).
276
+ * Used high-quality created data by [OpenAssistant](https://open-assistant.io/).
277
+ * Used base models by [EleutherAI](https://www.eleuther.ai/).
278
+ * Used OIG data created by [LAION](https://laion.ai/blog/oig-dataset/).
279
+
280
+ ### Why H2O.ai?
281
+
282
+ Our [Makers](https://h2o.ai/company/team/) at [H2O.ai](https://h2o.ai) have built several world-class Machine Learning, Deep Learning and AI platforms:
283
+ - #1 open-source machine learning platform for the enterprise [H2O-3](https://github.com/h2oai/h2o-3)
284
+ - The world's best AutoML (Automatic Machine Learning) with [H2O Driverless AI](https://h2o.ai/platform/ai-cloud/make/h2o-driverless-ai/)
285
+ - No-Code Deep Learning with [H2O Hydrogen Torch](https://h2o.ai/platform/ai-cloud/make/hydrogen-torch/)
286
+ - Document Processing with Deep Learning in [Document AI](https://h2o.ai/platform/ai-cloud/make/document-ai/)
287
+
288
+ We also built platforms for deployment and monitoring, and for data wrangling and governance:
289
+ - [H2O MLOps](https://h2o.ai/platform/ai-cloud/operate/h2o-mlops/) to deploy and monitor models at scale
290
+ - [H2O Feature Store](https://h2o.ai/platform/ai-cloud/make/feature-store/) in collaboration with AT&T
291
+ - Open-source Low-Code AI App Development Frameworks [Wave](https://wave.h2o.ai/) and [Nitro](https://nitro.h2o.ai/)
292
+ - Open-source Python [datatable](https://github.com/h2oai/datatable/) (the engine for H2O Driverless AI feature engineering)
293
+
294
+ Many of our customers are creating models and deploying them enterprise-wide and at scale in the [H2O AI Cloud](https://h2o.ai/platform/ai-cloud/):
295
+ - Multi-Cloud or on Premises
296
+ - [Managed Cloud (SaaS)](https://h2o.ai/platform/ai-cloud/managed)
297
+ - [Hybrid Cloud](https://h2o.ai/platform/ai-cloud/hybrid)
298
+ - [AI Appstore](https://docs.h2o.ai/h2o-ai-cloud/)
299
+
300
+ We are proud to have over 25 (of the world's 280) [Kaggle Grandmasters](https://h2o.ai/company/team/kaggle-grandmasters/) call H2O home, including three Kaggle Grandmasters who have made it to world #1.
301
+
302
+ ### Disclaimer
303
+
304
+ Please read this disclaimer carefully before using the large language model provided in this repository. Your use of the model signifies your agreement to the following terms and conditions.
305
+
306
+ - Biases and Offensiveness: The large language model is trained on a diverse range of internet text data, which may contain biased, racist, offensive, or otherwise inappropriate content. By using this model, you acknowledge and accept that the generated content may sometimes exhibit biases or produce content that is offensive or inappropriate. The developers of this repository do not endorse, support, or promote any such content or viewpoints.
307
+ - Limitations: The large language model is an AI-based tool and not a human. It may produce incorrect, nonsensical, or irrelevant responses. It is the user's responsibility to critically evaluate the generated content and use it at their discretion.
308
+ - Use at Your Own Risk: Users of this large language model must assume full responsibility for any consequences that may arise from their use of the tool. The developers and contributors of this repository shall not be held liable for any damages, losses, or harm resulting from the use or misuse of the provided model.
309
+ - Ethical Considerations: Users are encouraged to use the large language model responsibly and ethically. By using this model, you agree not to use it for purposes that promote hate speech, discrimination, harassment, or any form of illegal or harmful activities.
310
+ - Reporting Issues: If you encounter any biased, offensive, or otherwise inappropriate content generated by the large language model, please report it to the repository maintainers through the provided channels. Your feedback will help improve the model and mitigate potential issues.
311
+ - Changes to this Disclaimer: The developers of this repository reserve the right to modify or update this disclaimer at any time without prior notice. It is the user's responsibility to periodically review the disclaimer to stay informed about any changes.
312
+
313
+ By using the large language model provided in this repository, you agree to accept and comply with the terms and conditions outlined in this disclaimer. If you do not agree with any part of this disclaimer, you should refrain from using the model and any content generated by it.
314
+
315
+ ## Star History
316
+
317
+ [![Star History Chart](https://api.star-history.com/svg?repos=h2oai/h2ogpt&type=Timeline)](https://star-history.com/#h2oai/h2ogpt&Timeline)
auth.json.lock ADDED
File without changes
benchmarks/llm_gpu_benchmark.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ # %%
4
+ import json
5
+
6
+ import pandas as pd
7
+ import plotly.express as px
8
+ import plotly.graph_objects as go
9
+ import plotly.io as pio
10
+ from plotly.subplots import make_subplots
11
+
12
+ # %%
13
+ # Read the json file
14
+ # This file processes the llm_gpu_benchmark.json file in the tmp/inputs folder
15
+ # File is generated using the command
16
+ # curl -sSL https://raw.githubusercontent.com/h2oai/h2ogpt/main/benchmarks/perf.json | jq -s '.' > llm_gpu_benchmarks.json
17
+ with open('llm_gpu_benchmarks.json') as f:
18
+ data = json.load(f)
19
+ del f
20
+
21
+ # %%
22
+ # Read the json file into a dataframe
23
+ df = pd.json_normalize(data)
24
+ del data
25
+
26
+ # %%
27
+ # Process the dataframe
28
+ # Drop columns that are not needed
29
+ df.drop(columns=['task', 'ngpus', 'reps', 'date', 'git_sha', 'transformers', 'bitsandbytes', 'cuda', 'hostname',
30
+ 'summarize_input_len_bytes'], inplace=True)
31
+ # Rename columns
32
+ df.rename(columns={'n_gpus': 'gpu_count'}, inplace=True)
33
+ # Split the gpu column into gpu and gpu_memory
34
+ df["gpu_name"] = df.gpus.str.extract(r'[1-9] x ([\w\- ]+) .+')
35
+ df["gpu_memory_gb"] = round(
36
+ pd.to_numeric(df.gpus.str.extract(r'[\w ]+ \(([\d]+) .+', expand=False), errors='coerce') / 1024)
37
+ df["gpu_memory_gb"] = df["gpu_memory_gb"].astype('Int64')
38
+ df.drop(columns=['gpus'], inplace=True)
39
+ # Manage gpu_names
40
+ df.gpu_name = df.gpu_name.str.replace('NVIDIA ', '')
41
+ df.gpu_name = df.gpu_name.str.replace('GeForce ', '')
42
+ df.gpu_name = df.gpu_name.str.replace('A100-SXM4-80GB', 'A100 SXM4')
43
+ df.gpu_name = df.gpu_memory_gb.astype(str) + "-" + df.gpu_name
44
+ # Remove CPUs
45
+ df.drop(df[df.gpu_name.isnull()].index, inplace=True)
46
+
47
+ # %%
48
+ # Remove duplicate rows
49
+ df.drop_duplicates(['backend', 'base_model', 'bits', 'gpu_count', 'gpu_name'], inplace=True)
50
+
51
+ # %% Add baseline comparison columns
52
+ # Looking at the CPU data for 4, 8, and 16 bit quantization values for the benchmark we are simplifying it to a single
53
+ # value
54
+ cpu_summary_out_throughput = 1353 / 1216 # bytes/second (calculated from summarize_output_len_bytes / summarize_time)
55
+ cpu_generate_out_throughput = 849 / 180 # bytes/second (calculated from generate_output_len_bytes / generate_time)
56
+
57
+ # add GPU throughput columns
58
+ df["summary_out_throughput"] = df.summarize_output_len_bytes / df.summarize_time
59
+ df["generate_out_throughput"] = df.generate_output_len_bytes / df.generate_time
60
+ # add GPU throughput boost columns
61
+ df["summary_out_throughput_normalize"] = df.summary_out_throughput / cpu_summary_out_throughput
62
+ df["generate_out_throughput_normalize"] = df.generate_out_throughput / cpu_generate_out_throughput
63
+
64
+ # %%
65
+ # df.to_excel('tmp/scratchpad/output/llm_gpu_benchmarks.xlsx', index=False)
66
+
67
+ # %%
68
+ pio.renderers.default = "browser"
69
+
70
+ # %%
71
+ bits_bar_colors = {'4': px.colors.qualitative.D3[0],
72
+ '8': px.colors.qualitative.D3[1],
73
+ '16': px.colors.qualitative.D3[2]}
74
+
75
+ backends = list(df.backend.unique())
76
+ base_models = list(df.base_model.unique())
77
+ n_gpus = list(df.gpu_count.unique())
78
+
79
+ # %%
80
+ for backend in backends:
81
+ # for backend in ['transformers']:
82
+ fig_bar = make_subplots(rows=len(n_gpus),
83
+ cols=len(base_models) * 2,
84
+ shared_xaxes='all',
85
+ shared_yaxes='columns',
86
+ start_cell="top-left",
87
+ vertical_spacing=0.1,
88
+ print_grid=False,
89
+ row_titles=[f'{gpu_count} GPUs' for gpu_count in n_gpus],
90
+ column_titles=['llama2-7b-chat Summarization', 'llama2-7b-chat Generation',
91
+ 'llama2-13b-chat Summarization', 'llama2-13b-chat Generation',
92
+ 'llama2-70b-chat Summarization', 'llama2-70b-chat Generation'],)
93
+
94
+ # for base_model in ['h2oai/h2ogpt-4096-llama2-7b-chat']:
95
+ for base_model in base_models:
96
+ for gpu_count in n_gpus:
97
+ for bits in sorted(df.bits.unique()):
98
+ sub_df = df[(df.backend == backend) &
99
+ (df.base_model == base_model) &
100
+ (df.gpu_count == gpu_count) &
101
+ (df.bits == bits)].sort_values(by='gpu_name')
102
+ fig_bar.add_trace(go.Bar(x=sub_df.summary_out_throughput_normalize,
103
+ y=sub_df.gpu_name,
104
+ name=f'sum-{bits} bits',
105
+ legendgroup=f'sum-{bits} bits',
106
+ marker=dict(color=bits_bar_colors[f'{bits}']),
107
+ orientation='h'),
108
+ row=n_gpus.index(gpu_count) + 1,
109
+ col=base_models.index(base_model) * 2 + 1)
110
+ fig_bar.add_trace(go.Bar(x=sub_df.generate_out_throughput_normalize,
111
+ y=sub_df.gpu_name,
112
+ name=f'gen-{bits} bits',
113
+ legendgroup=f'gen-{bits} bits',
114
+ marker=dict(color=bits_bar_colors[f'{bits}']),
115
+ orientation='h'),
116
+ row=list(n_gpus).index(gpu_count) + 1,
117
+ col=list(base_models).index(base_model) * 2 + 2)
118
+
119
+ fig_bar.update_layout(plot_bgcolor='rgb(250,250,250)',
120
+ showlegend=True,
121
+ barmode="group")
122
+ # fig_bar.show()
123
+ fig_bar.write_html(f'llm_gpu_benchmark_{backend}.html', include_plotlyjs='cdn')
benchmarks/llm_gpu_benchmark_text-generation-inference.html ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ <html>
2
+ <head><meta charset="utf-8" /></head>
3
+ <body>
4
+ <div> <script type="text/javascript">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>
5
+ <script src="https://cdn.plot.ly/plotly-2.2.0.min.js"></script> <div id="8d98303e-9d8d-4a86-9ab9-85be1f565ba7" class="plotly-graph-div" style="height:100%; width:100%;"></div> <script type="text/javascript"> window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById("8d98303e-9d8d-4a86-9ab9-85be1f565ba7")) { Plotly.newPlot( "8d98303e-9d8d-4a86-9ab9-85be1f565ba7", [{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x","y":[],"yaxis":"y"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x2","y":[],"yaxis":"y2"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x","y":[],"yaxis":"y"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x2","y":[],"yaxis":"y2"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[31.964670378460696,40.07702972093452,28.212217062134258,24.76324507950772,29.383143217889106],"xaxis":"x","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[38.97113273835895,37.81293817302825,25.418311714688866,46.82453047975238,25.870047557539163],"xaxis":"x2","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y2"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x7","y":[],"yaxis":"y7"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x8","y":[],"yaxis":"y8"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x7","y":[],"yaxis":"y7"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x8","y":[],"yaxis":"y8"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[27.742149283479364,131.11372927692716,27.756812705358207],"xaxis":"x7","y":["45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y7"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[25.757641294033732,60.88036130542081,24.89894321470165],"xaxis":"x8","y":["45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y8"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x13","y":[],"yaxis":"y13"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x14","y":[],"yaxis":"y14"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x13","y":[],"yaxis":"y13"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x14","y":[],"yaxis":"y14"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[26.58192050074467,27.706125039541696],"xaxis":"x13","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y13"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[24.92264927072723,24.11901127583454],"xaxis":"x14","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y14"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x19","y":[],"yaxis":"y19"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x20","y":[],"yaxis":"y20"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x19","y":[],"yaxis":"y19"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x20","y":[],"yaxis":"y20"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[26.56845022740626],"xaxis":"x19","y":["80-A100 SXM4"],"yaxis":"y19"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[23.63055816163121],"xaxis":"x20","y":["80-A100 SXM4"],"yaxis":"y20"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x3","y":[],"yaxis":"y3"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x4","y":[],"yaxis":"y4"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x3","y":[],"yaxis":"y3"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x4","y":[],"yaxis":"y4"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[null,null,38.784585018023556,18.13337657657005],"xaxis":"x3","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y3"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[null,null,28.590730184060984,16.18347618092991],"xaxis":"x4","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y4"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x9","y":[],"yaxis":"y9"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x10","y":[],"yaxis":"y10"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x9","y":[],"yaxis":"y9"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x10","y":[],"yaxis":"y10"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[20.929693801547206,12.694114023867758,85.02391911717123,17.23203722663425],"xaxis":"x9","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y9"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[26.649908731325855,18.11013971401145,49.03779902422664,18.7070327239283],"xaxis":"x10","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y10"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x15","y":[],"yaxis":"y15"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x16","y":[],"yaxis":"y16"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x15","y":[],"yaxis":"y15"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x16","y":[],"yaxis":"y16"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[12.361580993407348,16.12018834278174],"xaxis":"x15","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y15"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[17.620036315851138,17.885323649884445],"xaxis":"x16","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y16"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x21","y":[],"yaxis":"y21"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x22","y":[],"yaxis":"y22"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x21","y":[],"yaxis":"y21"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x22","y":[],"yaxis":"y22"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[17.333509386436194],"xaxis":"x21","y":["80-A100 SXM4"],"yaxis":"y21"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[17.907476788430102],"xaxis":"x22","y":["80-A100 SXM4"],"yaxis":"y22"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x5","y":[],"yaxis":"y5"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x6","y":[],"yaxis":"y6"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x5","y":[],"yaxis":"y5"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x6","y":[],"yaxis":"y6"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[],"xaxis":"x5","y":[],"yaxis":"y5"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[],"xaxis":"x6","y":[],"yaxis":"y6"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x11","y":[],"yaxis":"y11"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x12","y":[],"yaxis":"y12"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x11","y":[],"yaxis":"y11"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x12","y":[],"yaxis":"y12"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[],"xaxis":"x11","y":[],"yaxis":"y11"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[],"xaxis":"x12","y":[],"yaxis":"y12"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x17","y":[],"yaxis":"y17"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x18","y":[],"yaxis":"y18"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x17","y":[],"yaxis":"y17"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x18","y":[],"yaxis":"y18"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[null,6.337898874140187],"xaxis":"x17","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y17"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[null,8.157040216950774],"xaxis":"x18","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y18"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x23","y":[],"yaxis":"y23"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x24","y":[],"yaxis":"y24"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x23","y":[],"yaxis":"y23"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x24","y":[],"yaxis":"y24"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[6.239297143818297],"xaxis":"x23","y":["80-A100 SXM4"],"yaxis":"y23"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[8.082069511295837],"xaxis":"x24","y":["80-A100 SXM4"],"yaxis":"y24"}], {"annotations":[{"font":{"size":16},"showarrow":false,"text":"llama2-7b-chat Summarization","x":0.06777777777777778,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"llama2-7b-chat Generation","x":0.2366666666666667,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"llama2-13b-chat Summarization","x":0.40555555555555556,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"llama2-13b-chat Generation","x":0.5744444444444445,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"llama2-70b-chat Summarization","x":0.7433333333333334,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"llama2-70b-chat Generation","x":0.9122222222222223,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"1 GPUs","textangle":90,"x":0.98,"xanchor":"left","xref":"paper","y":0.9125,"yanchor":"middle","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"2 GPUs","textangle":90,"x":0.98,"xanchor":"left","xref":"paper","y":0.6375000000000001,"yanchor":"middle","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"4 GPUs","textangle":90,"x":0.98,"xanchor":"left","xref":"paper","y":0.36250000000000004,"yanchor":"middle","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"8 GPUs","textangle":90,"x":0.98,"xanchor":"left","xref":"paper","y":0.0875,"yanchor":"middle","yref":"paper"}],"barmode":"group","plot_bgcolor":"rgb(250,250,250)","showlegend":true,"template":{"data":{"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"choropleth":[{"colorbar":{"outlinewidth":0,"ticks":""},"type":"choropleth"}],"contour":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"contour"}],"contourcarpet":[{"colorbar":{"outlinewidth":0,"ticks":""},"type":"contourcarpet"}],"heatmap":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"heatmap"}],"heatmapgl":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"heatmapgl"}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"histogram2d":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"histogram2d"}],"histogram2dcontour":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"histogram2dcontour"}],"mesh3d":[{"colorbar":{"outlinewidth":0,"ticks":""},"type":"mesh3d"}],"parcoords":[{"line":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"parcoords"}],"pie":[{"automargin":true,"type":"pie"}],"scatter":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatter"}],"scatter3d":[{"line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatter3d"}],"scattercarpet":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scattercarpet"}],"scattergeo":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scattergeo"}],"scattergl":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scattergl"}],"scattermapbox":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scattermapbox"}],"scatterpolar":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatterpolar"}],"scatterpolargl":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatterpolargl"}],"scatterternary":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatterternary"}],"surface":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"surface"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}]},"layout":{"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"autotypenumbers":"strict","coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]],"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]},"colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"geo":{"bgcolor":"white","lakecolor":"white","landcolor":"#E5ECF6","showlakes":true,"showland":true,"subunitcolor":"white"},"hoverlabel":{"align":"left"},"hovermode":"closest","mapbox":{"style":"light"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"bgcolor":"#E5ECF6","radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"ternary":{"aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"bgcolor":"#E5ECF6","caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"title":{"x":0.05},"xaxis":{"automargin":true,"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","zerolinewidth":2},"yaxis":{"automargin":true,"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","zerolinewidth":2}}},"xaxis":{"anchor":"y","domain":[0.0,0.13555555555555557],"matches":"x19","showticklabels":false},"xaxis10":{"anchor":"y10","domain":[0.5066666666666667,0.6422222222222222],"matches":"x19","showticklabels":false},"xaxis11":{"anchor":"y11","domain":[0.6755555555555556,0.8111111111111111],"matches":"x19","showticklabels":false},"xaxis12":{"anchor":"y12","domain":[0.8444444444444444,0.98],"matches":"x19","showticklabels":false},"xaxis13":{"anchor":"y13","domain":[0.0,0.13555555555555557],"matches":"x19","showticklabels":false},"xaxis14":{"anchor":"y14","domain":[0.1688888888888889,0.30444444444444446],"matches":"x19","showticklabels":false},"xaxis15":{"anchor":"y15","domain":[0.3377777777777778,0.4733333333333334],"matches":"x19","showticklabels":false},"xaxis16":{"anchor":"y16","domain":[0.5066666666666667,0.6422222222222222],"matches":"x19","showticklabels":false},"xaxis17":{"anchor":"y17","domain":[0.6755555555555556,0.8111111111111111],"matches":"x19","showticklabels":false},"xaxis18":{"anchor":"y18","domain":[0.8444444444444444,0.98],"matches":"x19","showticklabels":false},"xaxis19":{"anchor":"y19","domain":[0.0,0.13555555555555557]},"xaxis2":{"anchor":"y2","domain":[0.1688888888888889,0.30444444444444446],"matches":"x19","showticklabels":false},"xaxis20":{"anchor":"y20","domain":[0.1688888888888889,0.30444444444444446],"matches":"x19"},"xaxis21":{"anchor":"y21","domain":[0.3377777777777778,0.4733333333333334],"matches":"x19"},"xaxis22":{"anchor":"y22","domain":[0.5066666666666667,0.6422222222222222],"matches":"x19"},"xaxis23":{"anchor":"y23","domain":[0.6755555555555556,0.8111111111111111],"matches":"x19"},"xaxis24":{"anchor":"y24","domain":[0.8444444444444444,0.98],"matches":"x19"},"xaxis3":{"anchor":"y3","domain":[0.3377777777777778,0.4733333333333334],"matches":"x19","showticklabels":false},"xaxis4":{"anchor":"y4","domain":[0.5066666666666667,0.6422222222222222],"matches":"x19","showticklabels":false},"xaxis5":{"anchor":"y5","domain":[0.6755555555555556,0.8111111111111111],"matches":"x19","showticklabels":false},"xaxis6":{"anchor":"y6","domain":[0.8444444444444444,0.98],"matches":"x19","showticklabels":false},"xaxis7":{"anchor":"y7","domain":[0.0,0.13555555555555557],"matches":"x19","showticklabels":false},"xaxis8":{"anchor":"y8","domain":[0.1688888888888889,0.30444444444444446],"matches":"x19","showticklabels":false},"xaxis9":{"anchor":"y9","domain":[0.3377777777777778,0.4733333333333334],"matches":"x19","showticklabels":false},"yaxis":{"anchor":"x","domain":[0.825,1.0],"matches":"y19"},"yaxis10":{"anchor":"x10","domain":[0.55,0.7250000000000001],"matches":"y22"},"yaxis11":{"anchor":"x11","domain":[0.55,0.7250000000000001],"matches":"y23"},"yaxis12":{"anchor":"x12","domain":[0.55,0.7250000000000001],"matches":"y24"},"yaxis13":{"anchor":"x13","domain":[0.275,0.45],"matches":"y19"},"yaxis14":{"anchor":"x14","domain":[0.275,0.45],"matches":"y20"},"yaxis15":{"anchor":"x15","domain":[0.275,0.45],"matches":"y21"},"yaxis16":{"anchor":"x16","domain":[0.275,0.45],"matches":"y22"},"yaxis17":{"anchor":"x17","domain":[0.275,0.45],"matches":"y23"},"yaxis18":{"anchor":"x18","domain":[0.275,0.45],"matches":"y24"},"yaxis19":{"anchor":"x19","domain":[0.0,0.175]},"yaxis2":{"anchor":"x2","domain":[0.825,1.0],"matches":"y20"},"yaxis20":{"anchor":"x20","domain":[0.0,0.175]},"yaxis21":{"anchor":"x21","domain":[0.0,0.175]},"yaxis22":{"anchor":"x22","domain":[0.0,0.175]},"yaxis23":{"anchor":"x23","domain":[0.0,0.175]},"yaxis24":{"anchor":"x24","domain":[0.0,0.175]},"yaxis3":{"anchor":"x3","domain":[0.825,1.0],"matches":"y21"},"yaxis4":{"anchor":"x4","domain":[0.825,1.0],"matches":"y22"},"yaxis5":{"anchor":"x5","domain":[0.825,1.0],"matches":"y23"},"yaxis6":{"anchor":"x6","domain":[0.825,1.0],"matches":"y24"},"yaxis7":{"anchor":"x7","domain":[0.55,0.7250000000000001],"matches":"y19"},"yaxis8":{"anchor":"x8","domain":[0.55,0.7250000000000001],"matches":"y20"},"yaxis9":{"anchor":"x9","domain":[0.55,0.7250000000000001],"matches":"y21"}}, {"responsive": true} ) }; </script> </div>
6
+ </body>
7
+ </html>
benchmarks/llm_gpu_benchmark_transformers.html ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ <html>
2
+ <head><meta charset="utf-8" /></head>
3
+ <body>
4
+ <div> <script type="text/javascript">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>
5
+ <script src="https://cdn.plot.ly/plotly-2.2.0.min.js"></script> <div id="4671500e-e030-484c-8d8f-02c9ef28c439" class="plotly-graph-div" style="height:100%; width:100%;"></div> <script type="text/javascript"> window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById("4671500e-e030-484c-8d8f-02c9ef28c439")) { Plotly.newPlot( "4671500e-e030-484c-8d8f-02c9ef28c439", [{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[9.839381644193974,19.682153353799034,14.47651674912018,26.790154000919145,16.85058557689085],"xaxis":"x","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[12.67469844085007,27.622051912134882,19.374373797474846,27.42684895928983,20.2526752952322],"xaxis":"x2","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y2"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[4.417365201244467,11.290925144038532,6.08976919051411,9.56217317275004,5.9263976593415855],"xaxis":"x","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[3.695887145541112,7.812688672567852,5.614002693550519,7.59461596844275,6.252509885345299],"xaxis":"x2","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y2"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[18.73507454097704,39.43429532784967,27.07453064626594,39.96998450085984,29.3453161508673],"xaxis":"x","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[15.313436327725622,34.706856549443415,25.316661797353536,35.57028809081909,26.27458999671037],"xaxis":"x2","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y2"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[9.423935993931764,13.777794033942168,26.52473854898931,15.828182317775882],"xaxis":"x7","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y7"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[12.395401201017949,18.633481353508632,27.185836623669307,19.299187279602062],"xaxis":"x8","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y8"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[4.376286144153169,5.93295870509821,9.48124590639799,5.974715789431367],"xaxis":"x7","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y7"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[3.689148081304866,5.460311898298637,7.664435463393246,6.406802687346095],"xaxis":"x8","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y8"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[17.81624239176298,26.86157274268731,39.624799784757535,27.909081799152222],"xaxis":"x7","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y7"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[15.05454520400735,24.464037234597612,34.25052506253877,25.495156728837525],"xaxis":"x8","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y8"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[13.394795492541103,15.210707499507597],"xaxis":"x13","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y13"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[18.15606381072783,18.661753478727857],"xaxis":"x14","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y14"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[5.899421336969099,5.767145178389089],"xaxis":"x13","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y13"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[5.482425931352881,6.192523296540574],"xaxis":"x14","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y14"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[25.9430839554289,27.46244144955532],"xaxis":"x13","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y13"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[23.520372312313448,25.20924356998125],"xaxis":"x14","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y14"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[14.764927656045513],"xaxis":"x19","y":["80-A100 SXM4"],"yaxis":"y19"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[18.07719847124392],"xaxis":"x20","y":["80-A100 SXM4"],"yaxis":"y20"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[5.718961706449293],"xaxis":"x19","y":["80-A100 SXM4"],"yaxis":"y19"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[6.177879854004683],"xaxis":"x20","y":["80-A100 SXM4"],"yaxis":"y20"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[27.054106396318144],"xaxis":"x19","y":["80-A100 SXM4"],"yaxis":"y19"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[25.138719102309768],"xaxis":"x20","y":["80-A100 SXM4"],"yaxis":"y20"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[8.083390907285379,12.174340676118161,11.076606608131389,16.98095523506584,12.1008725506651],"xaxis":"x3","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y3"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[8.287678373962581,13.674114390829141,13.308822531004934,17.365713991091738,12.794482361704157],"xaxis":"x4","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y4"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[3.1403647823510736,4.962801741500335,3.5348819482865093,5.3562909858984185,4.1213135763128905],"xaxis":"x3","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y3"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[3.18815245154689,5.105728547922034,4.718240806380357,6.509024089959697,4.827719089783637],"xaxis":"x4","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y4"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[null,null,11.435609485285738,17.906931325335666,18.878279411581737],"xaxis":"x3","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y3"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[null,null,15.63493452970772,22.260343102292754,21.142120495293863],"xaxis":"x4","y":["24-RTX 3090","24-RTX 4090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y4"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[7.931395602652238,10.626258179366356,16.319110879759947,11.241866660596408],"xaxis":"x9","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y9"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[8.215795602873966,12.833338647314658,16.943446615015436,12.165800832662722],"xaxis":"x10","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y10"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[3.1362935762237645,3.499943275803895,5.402452917863267,3.9771491776646073],"xaxis":"x9","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y9"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[3.156100924190738,4.674808411970743,6.638529207897594,4.611620121814299],"xaxis":"x10","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y10"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[9.178059918412773,11.287265701494618,18.203631997182082,18.325614335569053],"xaxis":"x9","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y9"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[10.880162241524287,15.437944210820223,22.20571335065674,21.096027375985646],"xaxis":"x10","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y10"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[10.252856056970655,11.390035634842294],"xaxis":"x15","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y15"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[12.44766998737035,12.445574043628245],"xaxis":"x16","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y16"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[3.432161213004653,4.005435712274412],"xaxis":"x15","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y15"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[4.577172738204334,4.623478053690466],"xaxis":"x16","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y16"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[11.047490604822276,16.97583795634349],"xaxis":"x15","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y15"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[15.140929085583872,18.40904684710705],"xaxis":"x16","y":["45-RTX A6000","80-A100 SXM4"],"yaxis":"y16"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[10.82198892665345],"xaxis":"x21","y":["80-A100 SXM4"],"yaxis":"y21"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[11.846523539191672],"xaxis":"x22","y":["80-A100 SXM4"],"yaxis":"y22"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[3.8795801184687786],"xaxis":"x21","y":["80-A100 SXM4"],"yaxis":"y21"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[4.568029810459134],"xaxis":"x22","y":["80-A100 SXM4"],"yaxis":"y22"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[16.97013525520682],"xaxis":"x21","y":["80-A100 SXM4"],"yaxis":"y21"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[18.913362098572737],"xaxis":"x22","y":["80-A100 SXM4"],"yaxis":"y22"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[null,5.161890396610965,6.976123395155549],"xaxis":"x5","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation"],"yaxis":"y5"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[null,5.887611768925055,9.031399021823733],"xaxis":"x6","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation"],"yaxis":"y6"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[null,null,null],"xaxis":"x5","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation"],"yaxis":"y5"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[null,null,null],"xaxis":"x6","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation"],"yaxis":"y6"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[null,null,null,null],"xaxis":"x5","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y5"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[null,null,null,null],"xaxis":"x6","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation","80-A100 SXM4"],"yaxis":"y6"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[null,4.96092701086689,7.068376492905629],"xaxis":"x11","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation"],"yaxis":"y11"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[null,5.693355665703394,8.905280446876153],"xaxis":"x12","y":["24-RTX 3090","45-RTX A6000","48-RTX 6000 Ada Generation"],"yaxis":"y12"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[1.9856691832414866],"xaxis":"x11","y":["45-RTX A6000"],"yaxis":"y11"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[1.913951722547195],"xaxis":"x12","y":["45-RTX A6000"],"yaxis":"y12"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[null],"xaxis":"x11","y":["45-RTX A6000"],"yaxis":"y11"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[null],"xaxis":"x12","y":["45-RTX A6000"],"yaxis":"y12"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[4.8550061015042685],"xaxis":"x17","y":["45-RTX A6000"],"yaxis":"y17"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[5.58004075989967],"xaxis":"x18","y":["45-RTX A6000"],"yaxis":"y18"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[1.9670200139619358],"xaxis":"x17","y":["45-RTX A6000"],"yaxis":"y17"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[1.8873606277914459],"xaxis":"x18","y":["45-RTX A6000"],"yaxis":"y18"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[2.665381007576966],"xaxis":"x17","y":["45-RTX A6000"],"yaxis":"y17"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[3.597816249219273],"xaxis":"x18","y":["45-RTX A6000"],"yaxis":"y18"},{"legendgroup":"sum-4 bits","marker":{"color":"#1F77B4"},"name":"sum-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x23","y":[],"yaxis":"y23"},{"legendgroup":"gen-4 bits","marker":{"color":"#1F77B4"},"name":"gen-4 bits","orientation":"h","type":"bar","x":[],"xaxis":"x24","y":[],"yaxis":"y24"},{"legendgroup":"sum-8 bits","marker":{"color":"#FF7F0E"},"name":"sum-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x23","y":[],"yaxis":"y23"},{"legendgroup":"gen-8 bits","marker":{"color":"#FF7F0E"},"name":"gen-8 bits","orientation":"h","type":"bar","x":[],"xaxis":"x24","y":[],"yaxis":"y24"},{"legendgroup":"sum-16 bits","marker":{"color":"#2CA02C"},"name":"sum-16 bits","orientation":"h","type":"bar","x":[],"xaxis":"x23","y":[],"yaxis":"y23"},{"legendgroup":"gen-16 bits","marker":{"color":"#2CA02C"},"name":"gen-16 bits","orientation":"h","type":"bar","x":[],"xaxis":"x24","y":[],"yaxis":"y24"}], {"annotations":[{"font":{"size":16},"showarrow":false,"text":"llama2-7b-chat Summarization","x":0.06777777777777778,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"llama2-7b-chat Generation","x":0.2366666666666667,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"llama2-13b-chat Summarization","x":0.40555555555555556,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"llama2-13b-chat Generation","x":0.5744444444444445,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"llama2-70b-chat Summarization","x":0.7433333333333334,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"llama2-70b-chat Generation","x":0.9122222222222223,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"1 GPUs","textangle":90,"x":0.98,"xanchor":"left","xref":"paper","y":0.9125,"yanchor":"middle","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"2 GPUs","textangle":90,"x":0.98,"xanchor":"left","xref":"paper","y":0.6375000000000001,"yanchor":"middle","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"4 GPUs","textangle":90,"x":0.98,"xanchor":"left","xref":"paper","y":0.36250000000000004,"yanchor":"middle","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"8 GPUs","textangle":90,"x":0.98,"xanchor":"left","xref":"paper","y":0.0875,"yanchor":"middle","yref":"paper"}],"barmode":"group","plot_bgcolor":"rgb(250,250,250)","showlegend":true,"template":{"data":{"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"choropleth":[{"colorbar":{"outlinewidth":0,"ticks":""},"type":"choropleth"}],"contour":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"contour"}],"contourcarpet":[{"colorbar":{"outlinewidth":0,"ticks":""},"type":"contourcarpet"}],"heatmap":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"heatmap"}],"heatmapgl":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"heatmapgl"}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"histogram2d":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"histogram2d"}],"histogram2dcontour":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"histogram2dcontour"}],"mesh3d":[{"colorbar":{"outlinewidth":0,"ticks":""},"type":"mesh3d"}],"parcoords":[{"line":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"parcoords"}],"pie":[{"automargin":true,"type":"pie"}],"scatter":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatter"}],"scatter3d":[{"line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatter3d"}],"scattercarpet":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scattercarpet"}],"scattergeo":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scattergeo"}],"scattergl":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scattergl"}],"scattermapbox":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scattermapbox"}],"scatterpolar":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatterpolar"}],"scatterpolargl":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatterpolargl"}],"scatterternary":[{"marker":{"colorbar":{"outlinewidth":0,"ticks":""}},"type":"scatterternary"}],"surface":[{"colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"type":"surface"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}]},"layout":{"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"autotypenumbers":"strict","coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]],"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]},"colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"geo":{"bgcolor":"white","lakecolor":"white","landcolor":"#E5ECF6","showlakes":true,"showland":true,"subunitcolor":"white"},"hoverlabel":{"align":"left"},"hovermode":"closest","mapbox":{"style":"light"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"bgcolor":"#E5ECF6","radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","gridwidth":2,"linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white"}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"ternary":{"aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"bgcolor":"#E5ECF6","caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"title":{"x":0.05},"xaxis":{"automargin":true,"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","zerolinewidth":2},"yaxis":{"automargin":true,"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","zerolinewidth":2}}},"xaxis":{"anchor":"y","domain":[0.0,0.13555555555555557],"matches":"x19","showticklabels":false},"xaxis10":{"anchor":"y10","domain":[0.5066666666666667,0.6422222222222222],"matches":"x19","showticklabels":false},"xaxis11":{"anchor":"y11","domain":[0.6755555555555556,0.8111111111111111],"matches":"x19","showticklabels":false},"xaxis12":{"anchor":"y12","domain":[0.8444444444444444,0.98],"matches":"x19","showticklabels":false},"xaxis13":{"anchor":"y13","domain":[0.0,0.13555555555555557],"matches":"x19","showticklabels":false},"xaxis14":{"anchor":"y14","domain":[0.1688888888888889,0.30444444444444446],"matches":"x19","showticklabels":false},"xaxis15":{"anchor":"y15","domain":[0.3377777777777778,0.4733333333333334],"matches":"x19","showticklabels":false},"xaxis16":{"anchor":"y16","domain":[0.5066666666666667,0.6422222222222222],"matches":"x19","showticklabels":false},"xaxis17":{"anchor":"y17","domain":[0.6755555555555556,0.8111111111111111],"matches":"x19","showticklabels":false},"xaxis18":{"anchor":"y18","domain":[0.8444444444444444,0.98],"matches":"x19","showticklabels":false},"xaxis19":{"anchor":"y19","domain":[0.0,0.13555555555555557]},"xaxis2":{"anchor":"y2","domain":[0.1688888888888889,0.30444444444444446],"matches":"x19","showticklabels":false},"xaxis20":{"anchor":"y20","domain":[0.1688888888888889,0.30444444444444446],"matches":"x19"},"xaxis21":{"anchor":"y21","domain":[0.3377777777777778,0.4733333333333334],"matches":"x19"},"xaxis22":{"anchor":"y22","domain":[0.5066666666666667,0.6422222222222222],"matches":"x19"},"xaxis23":{"anchor":"y23","domain":[0.6755555555555556,0.8111111111111111],"matches":"x19"},"xaxis24":{"anchor":"y24","domain":[0.8444444444444444,0.98],"matches":"x19"},"xaxis3":{"anchor":"y3","domain":[0.3377777777777778,0.4733333333333334],"matches":"x19","showticklabels":false},"xaxis4":{"anchor":"y4","domain":[0.5066666666666667,0.6422222222222222],"matches":"x19","showticklabels":false},"xaxis5":{"anchor":"y5","domain":[0.6755555555555556,0.8111111111111111],"matches":"x19","showticklabels":false},"xaxis6":{"anchor":"y6","domain":[0.8444444444444444,0.98],"matches":"x19","showticklabels":false},"xaxis7":{"anchor":"y7","domain":[0.0,0.13555555555555557],"matches":"x19","showticklabels":false},"xaxis8":{"anchor":"y8","domain":[0.1688888888888889,0.30444444444444446],"matches":"x19","showticklabels":false},"xaxis9":{"anchor":"y9","domain":[0.3377777777777778,0.4733333333333334],"matches":"x19","showticklabels":false},"yaxis":{"anchor":"x","domain":[0.825,1.0],"matches":"y19"},"yaxis10":{"anchor":"x10","domain":[0.55,0.7250000000000001],"matches":"y22"},"yaxis11":{"anchor":"x11","domain":[0.55,0.7250000000000001],"matches":"y23"},"yaxis12":{"anchor":"x12","domain":[0.55,0.7250000000000001],"matches":"y24"},"yaxis13":{"anchor":"x13","domain":[0.275,0.45],"matches":"y19"},"yaxis14":{"anchor":"x14","domain":[0.275,0.45],"matches":"y20"},"yaxis15":{"anchor":"x15","domain":[0.275,0.45],"matches":"y21"},"yaxis16":{"anchor":"x16","domain":[0.275,0.45],"matches":"y22"},"yaxis17":{"anchor":"x17","domain":[0.275,0.45],"matches":"y23"},"yaxis18":{"anchor":"x18","domain":[0.275,0.45],"matches":"y24"},"yaxis19":{"anchor":"x19","domain":[0.0,0.175]},"yaxis2":{"anchor":"x2","domain":[0.825,1.0],"matches":"y20"},"yaxis20":{"anchor":"x20","domain":[0.0,0.175]},"yaxis21":{"anchor":"x21","domain":[0.0,0.175]},"yaxis22":{"anchor":"x22","domain":[0.0,0.175]},"yaxis23":{"anchor":"x23","domain":[0.0,0.175]},"yaxis24":{"anchor":"x24","domain":[0.0,0.175]},"yaxis3":{"anchor":"x3","domain":[0.825,1.0],"matches":"y21"},"yaxis4":{"anchor":"x4","domain":[0.825,1.0],"matches":"y22"},"yaxis5":{"anchor":"x5","domain":[0.825,1.0],"matches":"y23"},"yaxis6":{"anchor":"x6","domain":[0.825,1.0],"matches":"y24"},"yaxis7":{"anchor":"x7","domain":[0.55,0.7250000000000001],"matches":"y19"},"yaxis8":{"anchor":"x8","domain":[0.55,0.7250000000000001],"matches":"y20"},"yaxis9":{"anchor":"x9","domain":[0.55,0.7250000000000001],"matches":"y21"}}, {"responsive": true} ) }; </script> </div>
6
+ </body>
7
+ </html>
benchmarks/llm_gpu_benchmarks.json ADDED
@@ -0,0 +1,2790 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "backend": "transformers",
4
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
5
+ "task": "summary_and_generate",
6
+ "bits": 16,
7
+ "ngpus": 1,
8
+ "reps": 3,
9
+ "date": "08/18/2023 10:46:19",
10
+ "git_sha": "55d3b55b",
11
+ "n_gpus": 1,
12
+ "transformers": "4.30.2",
13
+ "bitsandbytes": "0.41.1",
14
+ "cuda": "11.7",
15
+ "hostname": "rippa",
16
+ "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)",
17
+ "summarize_input_len_bytes": 857252,
18
+ "summarize_output_len_bytes": 1417,
19
+ "summarize_time": 32.29472152392069,
20
+ "generate_output_len_bytes": 2384,
21
+ "generate_time": 14.563165505727133
22
+ },
23
+ {
24
+ "backend": "transformers",
25
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
26
+ "task": "summary_and_generate",
27
+ "bits": 16,
28
+ "ngpus": 1,
29
+ "reps": 3,
30
+ "date": "08/18/2023 10:48:55",
31
+ "git_sha": "55d3b55b",
32
+ "n_gpus": 1,
33
+ "transformers": "4.30.2",
34
+ "bitsandbytes": "0.41.1",
35
+ "cuda": "11.7",
36
+ "hostname": "timemachine",
37
+ "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
38
+ "summarize_input_len_bytes": 857252,
39
+ "summarize_output_len_bytes": 1417,
40
+ "summarize_time": 67.97515447934468,
41
+ "generate_output_len_bytes": 2384,
42
+ "generate_time": 33.00641902287801
43
+ },
44
+ {
45
+ "backend": "transformers",
46
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
47
+ "task": "summary_and_generate",
48
+ "bits": 8,
49
+ "ngpus": 1,
50
+ "reps": 3,
51
+ "date": "08/18/2023 10:48:58",
52
+ "git_sha": "55d3b55b",
53
+ "n_gpus": 1,
54
+ "transformers": "4.30.2",
55
+ "bitsandbytes": "0.41.1",
56
+ "cuda": "11.7",
57
+ "hostname": "rippa",
58
+ "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)",
59
+ "summarize_input_len_bytes": 857252,
60
+ "summarize_output_len_bytes": 1440,
61
+ "summarize_time": 114.62220064798991,
62
+ "generate_output_len_bytes": 2619,
63
+ "generate_time": 71.0722058614095
64
+ },
65
+ {
66
+ "backend": "transformers",
67
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
68
+ "task": "summary_and_generate",
69
+ "bits": 4,
70
+ "ngpus": 1,
71
+ "reps": 3,
72
+ "date": "08/18/2023 10:58:34",
73
+ "git_sha": "55d3b55b",
74
+ "n_gpus": 1,
75
+ "transformers": "4.30.2",
76
+ "bitsandbytes": "0.41.1",
77
+ "cuda": "11.7",
78
+ "hostname": "rippa",
79
+ "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)",
80
+ "summarize_input_len_bytes": 857252,
81
+ "summarize_output_len_bytes": 866,
82
+ "summarize_time": 39.54404203097025,
83
+ "generate_output_len_bytes": 2927,
84
+ "generate_time": 22.466302394866943
85
+ },
86
+ {
87
+ "backend": "transformers",
88
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
89
+ "task": "summary_and_generate",
90
+ "bits": 16,
91
+ "ngpus": 2,
92
+ "reps": 3,
93
+ "date": "08/18/2023 11:01:59",
94
+ "git_sha": "55d3b55b",
95
+ "n_gpus": 2,
96
+ "transformers": "4.30.2",
97
+ "bitsandbytes": "0.41.1",
98
+ "cuda": "11.7",
99
+ "hostname": "rippa",
100
+ "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
101
+ "summarize_input_len_bytes": 857252,
102
+ "summarize_output_len_bytes": 1417,
103
+ "summarize_time": 32.1394579410553,
104
+ "generate_output_len_bytes": 2384,
105
+ "generate_time": 14.757195552190145
106
+ },
107
+ {
108
+ "backend": "transformers",
109
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
110
+ "task": "summary_and_generate",
111
+ "bits": 8,
112
+ "ngpus": 1,
113
+ "reps": 3,
114
+ "date": "08/18/2023 10:54:29",
115
+ "git_sha": "55d3b55b",
116
+ "n_gpus": 1,
117
+ "transformers": "4.30.2",
118
+ "bitsandbytes": "0.41.1",
119
+ "cuda": "11.7",
120
+ "hostname": "timemachine",
121
+ "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
122
+ "summarize_input_len_bytes": 857252,
123
+ "summarize_output_len_bytes": 910,
124
+ "summarize_time": 185.14580019315085,
125
+ "generate_output_len_bytes": 2042,
126
+ "generate_time": 117.13909141222636
127
+ },
128
+ {
129
+ "backend": "transformers",
130
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
131
+ "task": "summary_and_generate",
132
+ "bits": 8,
133
+ "ngpus": 2,
134
+ "reps": 3,
135
+ "date": "08/18/2023 11:04:37",
136
+ "git_sha": "55d3b55b",
137
+ "n_gpus": 2,
138
+ "transformers": "4.30.2",
139
+ "bitsandbytes": "0.41.1",
140
+ "cuda": "11.7",
141
+ "hostname": "rippa",
142
+ "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
143
+ "summarize_input_len_bytes": 857252,
144
+ "summarize_output_len_bytes": 1002,
145
+ "summarize_time": 94.98129558563232,
146
+ "generate_output_len_bytes": 2512,
147
+ "generate_time": 69.4871145884196
148
+ },
149
+ {
150
+ "backend": "transformers",
151
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
152
+ "task": "summary_and_generate",
153
+ "bits": 4,
154
+ "ngpus": 2,
155
+ "reps": 3,
156
+ "date": "08/18/2023 11:13:08",
157
+ "git_sha": "55d3b55b",
158
+ "n_gpus": 2,
159
+ "transformers": "4.30.2",
160
+ "bitsandbytes": "0.41.1",
161
+ "cuda": "11.7",
162
+ "hostname": "rippa",
163
+ "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
164
+ "summarize_input_len_bytes": 857252,
165
+ "summarize_output_len_bytes": 1276,
166
+ "summarize_time": 43.23498781522115,
167
+ "generate_output_len_bytes": 2927,
168
+ "generate_time": 22.826789538065594
169
+ },
170
+ {
171
+ "backend": "transformers",
172
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
173
+ "task": "summary_and_generate",
174
+ "bits": 4,
175
+ "ngpus": 1,
176
+ "reps": 3,
177
+ "date": "08/18/2023 11:10:08",
178
+ "git_sha": "55d3b55b",
179
+ "n_gpus": 1,
180
+ "transformers": "4.30.2",
181
+ "bitsandbytes": "0.41.1",
182
+ "cuda": "11.7",
183
+ "hostname": "timemachine",
184
+ "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
185
+ "summarize_input_len_bytes": 857252,
186
+ "summarize_output_len_bytes": 991,
187
+ "summarize_time": 90.51939169565837,
188
+ "generate_output_len_bytes": 2927,
189
+ "generate_time": 48.96095744768778
190
+ },
191
+ {
192
+ "backend": "transformers",
193
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
194
+ "task": "summary_and_generate",
195
+ "bits": 16,
196
+ "ngpus": 1,
197
+ "reps": 3,
198
+ "date": "08/18/2023 11:16:48",
199
+ "git_sha": "55d3b55b",
200
+ "n_gpus": 1,
201
+ "transformers": "4.30.2",
202
+ "bitsandbytes": "0.41.1",
203
+ "cuda": "11.7",
204
+ "hostname": "rippa",
205
+ "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
206
+ "summarize_input_len_bytes": 857252,
207
+ "summarize_output_len_bytes": 1417,
208
+ "summarize_time": 31.86189842224121,
209
+ "generate_output_len_bytes": 2384,
210
+ "generate_time": 14.209659894307455
211
+ },
212
+ {
213
+ "backend": "transformers",
214
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
215
+ "task": "summary_and_generate",
216
+ "bits": 16,
217
+ "ngpus": 2,
218
+ "reps": 3,
219
+ "date": "08/18/2023 11:17:39",
220
+ "git_sha": "55d3b55b",
221
+ "n_gpus": 2,
222
+ "transformers": "4.30.2",
223
+ "bitsandbytes": "0.41.1",
224
+ "cuda": "11.7",
225
+ "hostname": "timemachine",
226
+ "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
227
+ "summarize_input_len_bytes": 857252,
228
+ "summarize_output_len_bytes": 1417,
229
+ "summarize_time": 71.48081835110982,
230
+ "generate_output_len_bytes": 2384,
231
+ "generate_time": 33.5740262667338
232
+ },
233
+ {
234
+ "backend": "transformers",
235
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
236
+ "task": "summary_and_generate",
237
+ "bits": 8,
238
+ "ngpus": 1,
239
+ "reps": 3,
240
+ "date": "08/18/2023 11:19:24",
241
+ "git_sha": "55d3b55b",
242
+ "n_gpus": 1,
243
+ "transformers": "4.30.2",
244
+ "bitsandbytes": "0.41.1",
245
+ "cuda": "11.7",
246
+ "hostname": "rippa",
247
+ "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
248
+ "summarize_input_len_bytes": 857252,
249
+ "summarize_output_len_bytes": 1002,
250
+ "summarize_time": 94.17744310696919,
251
+ "generate_output_len_bytes": 2512,
252
+ "generate_time": 70.12592967351277
253
+ },
254
+ {
255
+ "backend": "transformers",
256
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
257
+ "task": "summary_and_generate",
258
+ "bits": 4,
259
+ "ngpus": 1,
260
+ "reps": 3,
261
+ "date": "08/18/2023 11:27:57",
262
+ "git_sha": "55d3b55b",
263
+ "n_gpus": 1,
264
+ "transformers": "4.30.2",
265
+ "bitsandbytes": "0.41.1",
266
+ "cuda": "11.7",
267
+ "hostname": "rippa",
268
+ "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
269
+ "summarize_input_len_bytes": 857252,
270
+ "summarize_output_len_bytes": 1276,
271
+ "summarize_time": 42.8066500822703,
272
+ "generate_output_len_bytes": 2927,
273
+ "generate_time": 22.626200040181477
274
+ },
275
+ {
276
+ "backend": "transformers",
277
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
278
+ "task": "summary_and_generate",
279
+ "bits": 8,
280
+ "ngpus": 2,
281
+ "reps": 3,
282
+ "date": "08/18/2023 11:23:22",
283
+ "git_sha": "55d3b55b",
284
+ "n_gpus": 2,
285
+ "transformers": "4.30.2",
286
+ "bitsandbytes": "0.41.1",
287
+ "cuda": "11.7",
288
+ "hostname": "timemachine",
289
+ "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
290
+ "summarize_input_len_bytes": 857252,
291
+ "summarize_output_len_bytes": 910,
292
+ "summarize_time": 186.88371555010477,
293
+ "generate_output_len_bytes": 2042,
294
+ "generate_time": 117.3530724843343
295
+ },
296
+ {
297
+ "backend": "transformers",
298
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
299
+ "task": "summary_and_generate",
300
+ "bits": 4,
301
+ "ngpus": 2,
302
+ "reps": 3,
303
+ "date": "08/18/2023 11:39:03",
304
+ "git_sha": "55d3b55b",
305
+ "n_gpus": 2,
306
+ "transformers": "4.30.2",
307
+ "bitsandbytes": "0.41.1",
308
+ "cuda": "11.7",
309
+ "hostname": "timemachine",
310
+ "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
311
+ "summarize_input_len_bytes": 857252,
312
+ "summarize_output_len_bytes": 991,
313
+ "summarize_time": 94.50985678037007,
314
+ "generate_output_len_bytes": 2927,
315
+ "generate_time": 50.06416177749634
316
+ },
317
+ {
318
+ "backend": "transformers",
319
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
320
+ "task": "summary_and_generate",
321
+ "bits": 16,
322
+ "ngpus": 1,
323
+ "reps": 3,
324
+ "date": "08/18/2023 21:08:31",
325
+ "git_sha": "fc4826f2",
326
+ "n_gpus": 1,
327
+ "transformers": "4.30.2",
328
+ "bitsandbytes": "0.41.1",
329
+ "cuda": "11.8",
330
+ "hostname": "cloudvm",
331
+ "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
332
+ "summarize_input_len_bytes": 857252,
333
+ "summarize_output_len_bytes": 1267,
334
+ "summarize_time": 38.80374129613241,
335
+ "generate_output_len_bytes": 2384,
336
+ "generate_time": 19.23690136273702
337
+ },
338
+ {
339
+ "backend": "transformers",
340
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
341
+ "task": "summary_and_generate",
342
+ "bits": 8,
343
+ "ngpus": 1,
344
+ "reps": 3,
345
+ "date": "08/18/2023 21:11:49",
346
+ "git_sha": "fc4826f2",
347
+ "n_gpus": 1,
348
+ "transformers": "4.30.2",
349
+ "bitsandbytes": "0.41.1",
350
+ "cuda": "11.8",
351
+ "hostname": "cloudvm",
352
+ "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
353
+ "summarize_input_len_bytes": 857252,
354
+ "summarize_output_len_bytes": 1179,
355
+ "summarize_time": 178.79640992482504,
356
+ "generate_output_len_bytes": 2772,
357
+ "generate_time": 93.99476226170857
358
+ },
359
+ {
360
+ "backend": "transformers",
361
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
362
+ "task": "summary_and_generate",
363
+ "bits": 4,
364
+ "ngpus": 1,
365
+ "reps": 3,
366
+ "date": "08/18/2023 21:25:53",
367
+ "git_sha": "fc4826f2",
368
+ "n_gpus": 1,
369
+ "transformers": "4.30.2",
370
+ "bitsandbytes": "0.41.1",
371
+ "cuda": "11.8",
372
+ "hostname": "cloudvm",
373
+ "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
374
+ "summarize_input_len_bytes": 857252,
375
+ "summarize_output_len_bytes": 1002,
376
+ "summarize_time": 53.44271365801493,
377
+ "generate_output_len_bytes": 2927,
378
+ "generate_time": 30.641155401865642
379
+ },
380
+ {
381
+ "backend": "transformers",
382
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
383
+ "task": "summary_and_generate",
384
+ "bits": 16,
385
+ "ngpus": 2,
386
+ "reps": 3,
387
+ "date": "08/18/2023 21:30:30",
388
+ "git_sha": "fc4826f2",
389
+ "n_gpus": 2,
390
+ "transformers": "4.30.2",
391
+ "bitsandbytes": "0.41.1",
392
+ "cuda": "11.8",
393
+ "hostname": "cloudvm",
394
+ "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
395
+ "summarize_input_len_bytes": 857252,
396
+ "summarize_output_len_bytes": 1267,
397
+ "summarize_time": 40.80062770843506,
398
+ "generate_output_len_bytes": 2384,
399
+ "generate_time": 19.825008392333984
400
+ },
401
+ {
402
+ "backend": "transformers",
403
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
404
+ "task": "summary_and_generate",
405
+ "bits": 8,
406
+ "ngpus": 2,
407
+ "reps": 3,
408
+ "date": "08/18/2023 21:35:29",
409
+ "git_sha": "fc4826f2",
410
+ "n_gpus": 2,
411
+ "transformers": "4.30.2",
412
+ "bitsandbytes": "0.41.1",
413
+ "cuda": "11.8",
414
+ "hostname": "cloudvm",
415
+ "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
416
+ "summarize_input_len_bytes": 857252,
417
+ "summarize_output_len_bytes": 1179,
418
+ "summarize_time": 177.35046529769897,
419
+ "generate_output_len_bytes": 2772,
420
+ "generate_time": 91.73111907641093
421
+ },
422
+ {
423
+ "backend": "transformers",
424
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
425
+ "task": "summary_and_generate",
426
+ "bits": 4,
427
+ "ngpus": 2,
428
+ "reps": 3,
429
+ "date": "08/18/2023 21:49:20",
430
+ "git_sha": "fc4826f2",
431
+ "n_gpus": 2,
432
+ "transformers": "4.30.2",
433
+ "bitsandbytes": "0.41.1",
434
+ "cuda": "11.8",
435
+ "hostname": "cloudvm",
436
+ "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
437
+ "summarize_input_len_bytes": 857252,
438
+ "summarize_output_len_bytes": 1002,
439
+ "summarize_time": 56.894784371058144,
440
+ "generate_output_len_bytes": 2927,
441
+ "generate_time": 32.15500020980835
442
+ },
443
+ {
444
+ "backend": "transformers",
445
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
446
+ "task": "summary_and_generate",
447
+ "bits": 16,
448
+ "ngpus": 4,
449
+ "reps": 3,
450
+ "date": "08/18/2023 21:54:11",
451
+ "git_sha": "fc4826f2",
452
+ "n_gpus": 4,
453
+ "transformers": "4.30.2",
454
+ "bitsandbytes": "0.41.1",
455
+ "cuda": "11.8",
456
+ "hostname": "cloudvm",
457
+ "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
458
+ "summarize_input_len_bytes": 857252,
459
+ "summarize_output_len_bytes": 1267,
460
+ "summarize_time": 41.46419604619344,
461
+ "generate_output_len_bytes": 2384,
462
+ "generate_time": 20.049855709075928
463
+ },
464
+ {
465
+ "backend": "transformers",
466
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
467
+ "task": "summary_and_generate",
468
+ "bits": 8,
469
+ "ngpus": 4,
470
+ "reps": 3,
471
+ "date": "08/18/2023 21:57:39",
472
+ "git_sha": "fc4826f2",
473
+ "n_gpus": 4,
474
+ "transformers": "4.30.2",
475
+ "bitsandbytes": "0.41.1",
476
+ "cuda": "11.8",
477
+ "hostname": "cloudvm",
478
+ "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
479
+ "summarize_input_len_bytes": 857252,
480
+ "summarize_output_len_bytes": 1179,
481
+ "summarize_time": 183.73364853858948,
482
+ "generate_output_len_bytes": 2772,
483
+ "generate_time": 94.9052836894989
484
+ },
485
+ {
486
+ "backend": "transformers",
487
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
488
+ "task": "summary_and_generate",
489
+ "bits": 4,
490
+ "ngpus": 4,
491
+ "reps": 3,
492
+ "date": "08/18/2023 22:11:59",
493
+ "git_sha": "fc4826f2",
494
+ "n_gpus": 4,
495
+ "transformers": "4.30.2",
496
+ "bitsandbytes": "0.41.1",
497
+ "cuda": "11.8",
498
+ "hostname": "cloudvm",
499
+ "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
500
+ "summarize_input_len_bytes": 857252,
501
+ "summarize_output_len_bytes": 1002,
502
+ "summarize_time": 59.204413731892906,
503
+ "generate_output_len_bytes": 2927,
504
+ "generate_time": 33.25332593917847
505
+ },
506
+ {
507
+ "backend": "transformers",
508
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
509
+ "task": "summary_and_generate",
510
+ "bits": 16,
511
+ "ngpus": 8,
512
+ "reps": 3,
513
+ "date": "08/18/2023 22:17:00",
514
+ "git_sha": "fc4826f2",
515
+ "n_gpus": 8,
516
+ "transformers": "4.30.2",
517
+ "bitsandbytes": "0.41.1",
518
+ "cuda": "11.8",
519
+ "hostname": "cloudvm",
520
+ "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
521
+ "summarize_input_len_bytes": 857252,
522
+ "summarize_output_len_bytes": 1267,
523
+ "summarize_time": 42.09002653757731,
524
+ "generate_output_len_bytes": 2384,
525
+ "generate_time": 20.106103817621868
526
+ },
527
+ {
528
+ "backend": "transformers",
529
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
530
+ "task": "summary_and_generate",
531
+ "bits": 8,
532
+ "ngpus": 8,
533
+ "reps": 3,
534
+ "date": "08/18/2023 22:20:31",
535
+ "git_sha": "fc4826f2",
536
+ "n_gpus": 8,
537
+ "transformers": "4.30.2",
538
+ "bitsandbytes": "0.41.1",
539
+ "cuda": "11.8",
540
+ "hostname": "cloudvm",
541
+ "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
542
+ "summarize_input_len_bytes": 857252,
543
+ "summarize_output_len_bytes": 1179,
544
+ "summarize_time": 185.28164370854697,
545
+ "generate_output_len_bytes": 2772,
546
+ "generate_time": 95.13023789723714
547
+ },
548
+ {
549
+ "backend": "transformers",
550
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
551
+ "task": "summary_and_generate",
552
+ "bits": 4,
553
+ "ngpus": 8,
554
+ "reps": 3,
555
+ "date": "08/18/2023 22:34:58",
556
+ "git_sha": "fc4826f2",
557
+ "n_gpus": 8,
558
+ "transformers": "4.30.2",
559
+ "bitsandbytes": "0.41.1",
560
+ "cuda": "11.8",
561
+ "hostname": "cloudvm",
562
+ "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
563
+ "summarize_input_len_bytes": 857252,
564
+ "summarize_output_len_bytes": 1002,
565
+ "summarize_time": 60.9919019540151,
566
+ "generate_output_len_bytes": 2927,
567
+ "generate_time": 34.328625202178955
568
+ },
569
+ {
570
+ "backend": "transformers",
571
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
572
+ "task": "summary_and_generate",
573
+ "bits": 16,
574
+ "ngpus": 1,
575
+ "reps": 3,
576
+ "date": "08/18/2023 13:31:34",
577
+ "git_sha": "fc4826f2",
578
+ "n_gpus": 1,
579
+ "transformers": "4.30.2",
580
+ "bitsandbytes": "0.41.1",
581
+ "cuda": "11.7",
582
+ "hostname": "rippa",
583
+ "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
584
+ "summarize_input_len_bytes": 857252,
585
+ "summarize_output_len_bytes": 1046,
586
+ "summarize_time": 52.49842747052511,
587
+ "generate_output_len_bytes": 2172,
588
+ "generate_time": 20.686774571736652
589
+ },
590
+ {
591
+ "backend": "transformers",
592
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
593
+ "task": "summary_and_generate",
594
+ "bits": 16,
595
+ "ngpus": 1,
596
+ "reps": 3,
597
+ "date": "08/18/2023 13:31:55",
598
+ "git_sha": "fc4826f2",
599
+ "n_gpus": 1,
600
+ "transformers": "4.30.2",
601
+ "bitsandbytes": "0.41.1",
602
+ "cuda": "11.7",
603
+ "hostname": "timemachine",
604
+ "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
605
+ "exception": "OOM"
606
+ },
607
+ {
608
+ "backend": "transformers",
609
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
610
+ "task": "summary_and_generate",
611
+ "bits": 8,
612
+ "ngpus": 1,
613
+ "reps": 3,
614
+ "date": "08/18/2023 13:35:38",
615
+ "git_sha": "fc4826f2",
616
+ "n_gpus": 1,
617
+ "transformers": "4.30.2",
618
+ "bitsandbytes": "0.41.1",
619
+ "cuda": "11.7",
620
+ "hostname": "rippa",
621
+ "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
622
+ "summarize_input_len_bytes": 857252,
623
+ "summarize_output_len_bytes": 1007,
624
+ "summarize_time": 168.9666860898336,
625
+ "generate_output_len_bytes": 2249,
626
+ "generate_time": 73.25518870353699
627
+ },
628
+ {
629
+ "backend": "transformers",
630
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
631
+ "task": "summary_and_generate",
632
+ "bits": 4,
633
+ "ngpus": 1,
634
+ "reps": 3,
635
+ "date": "08/18/2023 13:48:09",
636
+ "git_sha": "fc4826f2",
637
+ "n_gpus": 1,
638
+ "transformers": "4.30.2",
639
+ "bitsandbytes": "0.41.1",
640
+ "cuda": "11.7",
641
+ "hostname": "rippa",
642
+ "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
643
+ "summarize_input_len_bytes": 857252,
644
+ "summarize_output_len_bytes": 856,
645
+ "summarize_time": 45.30513469378153,
646
+ "generate_output_len_bytes": 1802,
647
+ "generate_time": 22.000216643015545
648
+ },
649
+ {
650
+ "backend": "transformers",
651
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
652
+ "task": "summary_and_generate",
653
+ "bits": 16,
654
+ "ngpus": 2,
655
+ "reps": 3,
656
+ "date": "08/18/2023 13:51:56",
657
+ "git_sha": "fc4826f2",
658
+ "n_gpus": 2,
659
+ "transformers": "4.30.2",
660
+ "bitsandbytes": "0.41.1",
661
+ "cuda": "11.7",
662
+ "hostname": "rippa",
663
+ "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
664
+ "summarize_input_len_bytes": 857252,
665
+ "summarize_output_len_bytes": 1046,
666
+ "summarize_time": 51.64275654157003,
667
+ "generate_output_len_bytes": 2172,
668
+ "generate_time": 20.737667481104534
669
+ },
670
+ {
671
+ "backend": "transformers",
672
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
673
+ "task": "summary_and_generate",
674
+ "bits": 8,
675
+ "ngpus": 1,
676
+ "reps": 3,
677
+ "date": "08/18/2023 13:35:47",
678
+ "git_sha": "fc4826f2",
679
+ "n_gpus": 1,
680
+ "transformers": "4.30.2",
681
+ "bitsandbytes": "0.41.1",
682
+ "cuda": "11.7",
683
+ "hostname": "timemachine",
684
+ "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
685
+ "summarize_input_len_bytes": 857252,
686
+ "summarize_output_len_bytes": 980,
687
+ "summarize_time": 280.4669913450877,
688
+ "generate_output_len_bytes": 2132,
689
+ "generate_time": 141.7793349424998
690
+ },
691
+ {
692
+ "backend": "transformers",
693
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
694
+ "task": "summary_and_generate",
695
+ "bits": 4,
696
+ "ngpus": 1,
697
+ "reps": 3,
698
+ "date": "08/18/2023 13:57:35",
699
+ "git_sha": "fc4826f2",
700
+ "n_gpus": 1,
701
+ "transformers": "4.30.2",
702
+ "bitsandbytes": "0.41.1",
703
+ "cuda": "11.7",
704
+ "hostname": "timemachine",
705
+ "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
706
+ "summarize_input_len_bytes": 857252,
707
+ "summarize_output_len_bytes": 869,
708
+ "summarize_time": 96.61887431144714,
709
+ "generate_output_len_bytes": 3244,
710
+ "generate_time": 82.98751719792683
711
+ },
712
+ {
713
+ "backend": "transformers",
714
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
715
+ "task": "summary_and_generate",
716
+ "bits": 8,
717
+ "ngpus": 2,
718
+ "reps": 3,
719
+ "date": "08/18/2023 13:55:51",
720
+ "git_sha": "fc4826f2",
721
+ "n_gpus": 2,
722
+ "transformers": "4.30.2",
723
+ "bitsandbytes": "0.41.1",
724
+ "cuda": "11.7",
725
+ "hostname": "rippa",
726
+ "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
727
+ "summarize_input_len_bytes": 857252,
728
+ "summarize_output_len_bytes": 1007,
729
+ "summarize_time": 167.52292919158936,
730
+ "generate_output_len_bytes": 2249,
731
+ "generate_time": 71.82611886660258
732
+ },
733
+ {
734
+ "backend": "transformers",
735
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
736
+ "task": "summary_and_generate",
737
+ "bits": 4,
738
+ "ngpus": 2,
739
+ "reps": 3,
740
+ "date": "08/18/2023 14:08:08",
741
+ "git_sha": "fc4826f2",
742
+ "n_gpus": 2,
743
+ "transformers": "4.30.2",
744
+ "bitsandbytes": "0.41.1",
745
+ "cuda": "11.7",
746
+ "hostname": "rippa",
747
+ "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
748
+ "summarize_input_len_bytes": 857252,
749
+ "summarize_output_len_bytes": 856,
750
+ "summarize_time": 47.14254776636759,
751
+ "generate_output_len_bytes": 1802,
752
+ "generate_time": 22.54850967725118
753
+ },
754
+ {
755
+ "backend": "transformers",
756
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
757
+ "task": "summary_and_generate",
758
+ "bits": 16,
759
+ "ngpus": 1,
760
+ "reps": 3,
761
+ "date": "08/18/2023 14:15:15",
762
+ "git_sha": "d13230ee",
763
+ "n_gpus": 1,
764
+ "transformers": "4.30.2",
765
+ "bitsandbytes": "0.41.1",
766
+ "cuda": "11.7",
767
+ "hostname": "rippa",
768
+ "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)",
769
+ "exception": "OOM"
770
+ },
771
+ {
772
+ "backend": "transformers",
773
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
774
+ "task": "summary_and_generate",
775
+ "bits": 16,
776
+ "ngpus": 2,
777
+ "reps": 3,
778
+ "date": "08/18/2023 14:07:15",
779
+ "git_sha": "fc4826f2",
780
+ "n_gpus": 2,
781
+ "transformers": "4.30.2",
782
+ "bitsandbytes": "0.41.1",
783
+ "cuda": "11.7",
784
+ "hostname": "timemachine",
785
+ "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
786
+ "summarize_input_len_bytes": 857252,
787
+ "summarize_output_len_bytes": 915,
788
+ "summarize_time": 89.59958203633626,
789
+ "generate_output_len_bytes": 2172,
790
+ "generate_time": 42.32424934705099
791
+ },
792
+ {
793
+ "backend": "transformers",
794
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
795
+ "task": "summary_and_generate",
796
+ "bits": 8,
797
+ "ngpus": 1,
798
+ "reps": 3,
799
+ "date": "08/18/2023 14:15:30",
800
+ "git_sha": "d13230ee",
801
+ "n_gpus": 1,
802
+ "transformers": "4.30.2",
803
+ "bitsandbytes": "0.41.1",
804
+ "cuda": "11.7",
805
+ "hostname": "rippa",
806
+ "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)",
807
+ "summarize_input_len_bytes": 857252,
808
+ "summarize_output_len_bytes": 1024,
809
+ "summarize_time": 185.44230167071024,
810
+ "generate_output_len_bytes": 2122,
811
+ "generate_time": 88.11553311347961
812
+ },
813
+ {
814
+ "backend": "transformers",
815
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
816
+ "task": "summary_and_generate",
817
+ "bits": 4,
818
+ "ngpus": 1,
819
+ "reps": 3,
820
+ "date": "08/18/2023 14:29:36",
821
+ "git_sha": "d13230ee",
822
+ "n_gpus": 1,
823
+ "transformers": "4.30.2",
824
+ "bitsandbytes": "0.41.1",
825
+ "cuda": "11.7",
826
+ "hostname": "rippa",
827
+ "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)",
828
+ "summarize_input_len_bytes": 857252,
829
+ "summarize_output_len_bytes": 922,
830
+ "summarize_time": 68.06459252039592,
831
+ "generate_output_len_bytes": 1802,
832
+ "generate_time": 27.939613421758015
833
+ },
834
+ {
835
+ "backend": "transformers",
836
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
837
+ "task": "summary_and_generate",
838
+ "bits": 8,
839
+ "ngpus": 2,
840
+ "reps": 3,
841
+ "date": "08/18/2023 14:26:29",
842
+ "git_sha": "d13230ee",
843
+ "n_gpus": 2,
844
+ "transformers": "4.30.2",
845
+ "bitsandbytes": "0.41.1",
846
+ "cuda": "11.7",
847
+ "hostname": "timemachine",
848
+ "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
849
+ "summarize_input_len_bytes": 857252,
850
+ "summarize_output_len_bytes": 980,
851
+ "summarize_time": 280.8310640652974,
852
+ "generate_output_len_bytes": 2132,
853
+ "generate_time": 143.21916349728903
854
+ },
855
+ {
856
+ "backend": "transformers",
857
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
858
+ "task": "summary_and_generate",
859
+ "bits": 4,
860
+ "ngpus": 2,
861
+ "reps": 3,
862
+ "date": "08/18/2023 14:48:17",
863
+ "git_sha": "d13230ee",
864
+ "n_gpus": 2,
865
+ "transformers": "4.30.2",
866
+ "bitsandbytes": "0.41.1",
867
+ "cuda": "11.7",
868
+ "hostname": "timemachine",
869
+ "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
870
+ "summarize_input_len_bytes": 857252,
871
+ "summarize_output_len_bytes": 869,
872
+ "summarize_time": 98.47045453389485,
873
+ "generate_output_len_bytes": 3244,
874
+ "generate_time": 83.71360301971436
875
+ },
876
+ {
877
+ "backend": "transformers",
878
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
879
+ "task": "summary_and_generate",
880
+ "bits": 16,
881
+ "ngpus": 1,
882
+ "reps": 3,
883
+ "date": "08/18/2023 15:35:13",
884
+ "git_sha": "0dec0f52",
885
+ "n_gpus": 1,
886
+ "transformers": "4.30.2",
887
+ "bitsandbytes": "0.41.1",
888
+ "cuda": "11.7",
889
+ "hostname": "rippa",
890
+ "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
891
+ "exception": "OOM"
892
+ },
893
+ {
894
+ "backend": "transformers",
895
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
896
+ "task": "summary_and_generate",
897
+ "bits": 16,
898
+ "ngpus": 1,
899
+ "reps": 3,
900
+ "date": "08/18/2023 15:49:33",
901
+ "git_sha": "0cdb75ef",
902
+ "n_gpus": 1,
903
+ "transformers": "4.30.2",
904
+ "bitsandbytes": "0.41.1",
905
+ "cuda": "11.7",
906
+ "hostname": "timemachine",
907
+ "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
908
+ "exception": "OOM"
909
+ },
910
+ {
911
+ "backend": "transformers",
912
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
913
+ "task": "summary_and_generate",
914
+ "bits": 8,
915
+ "ngpus": 1,
916
+ "reps": 3,
917
+ "date": "08/18/2023 16:26:53",
918
+ "git_sha": "0cdb75ef",
919
+ "n_gpus": 1,
920
+ "transformers": "4.30.2",
921
+ "bitsandbytes": "0.41.1",
922
+ "cuda": "11.7",
923
+ "hostname": "rippa",
924
+ "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
925
+ "exception": "OOM"
926
+ },
927
+ {
928
+ "backend": "transformers",
929
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
930
+ "task": "summary_and_generate",
931
+ "bits": 8,
932
+ "ngpus": 1,
933
+ "reps": 3,
934
+ "date": "08/18/2023 16:27:32",
935
+ "git_sha": "0cdb75ef",
936
+ "n_gpus": 1,
937
+ "transformers": "4.30.2",
938
+ "bitsandbytes": "0.41.1",
939
+ "cuda": "11.7",
940
+ "hostname": "timemachine",
941
+ "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
942
+ "exception": "OOM"
943
+ },
944
+ {
945
+ "backend": "transformers",
946
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
947
+ "task": "summary_and_generate",
948
+ "bits": 4,
949
+ "ngpus": 1,
950
+ "reps": 3,
951
+ "date": "08/18/2023 16:29:03",
952
+ "git_sha": "0cdb75ef",
953
+ "n_gpus": 1,
954
+ "transformers": "4.30.2",
955
+ "bitsandbytes": "0.41.1",
956
+ "cuda": "11.7",
957
+ "hostname": "timemachine",
958
+ "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
959
+ "exception": "OOM"
960
+ },
961
+ {
962
+ "backend": "transformers",
963
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
964
+ "task": "summary_and_generate",
965
+ "bits": 4,
966
+ "ngpus": 2,
967
+ "reps": 3,
968
+ "date": "08/18/2023 17:26:02",
969
+ "git_sha": "0cdb75ef",
970
+ "n_gpus": 2,
971
+ "transformers": "4.30.2",
972
+ "bitsandbytes": "0.41.1",
973
+ "cuda": "11.7",
974
+ "hostname": "timemachine",
975
+ "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
976
+ "exception": "OOM"
977
+ },
978
+ {
979
+ "backend": "text-generation-inference",
980
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
981
+ "task": "summary_and_generate",
982
+ "bits": 16,
983
+ "ngpus": 1,
984
+ "reps": 3,
985
+ "date": "08/18/2023 18:59:16",
986
+ "git_sha": "5691db4a",
987
+ "n_gpus": 1,
988
+ "transformers": "4.30.2",
989
+ "bitsandbytes": "0.41.1",
990
+ "cuda": "11.7",
991
+ "hostname": "rippa",
992
+ "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
993
+ "summarize_input_len_bytes": 857252,
994
+ "summarize_output_len_bytes": 1075,
995
+ "summarize_time": 39.01545596122742,
996
+ "generate_output_len_bytes": 2242,
997
+ "generate_time": 10.151424566904703
998
+ },
999
+ {
1000
+ "backend": "text-generation-inference",
1001
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
1002
+ "task": "summary_and_generate",
1003
+ "bits": 16,
1004
+ "ngpus": 1,
1005
+ "reps": 3,
1006
+ "date": "08/18/2023 19:03:13",
1007
+ "git_sha": "5691db4a",
1008
+ "n_gpus": 1,
1009
+ "transformers": "4.30.2",
1010
+ "bitsandbytes": "0.41.1",
1011
+ "cuda": "11.7",
1012
+ "hostname": "rippa",
1013
+ "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
1014
+ "summarize_input_len_bytes": 857252,
1015
+ "summarize_output_len_bytes": 940,
1016
+ "summarize_time": 21.78233750661214,
1017
+ "generate_output_len_bytes": 2130,
1018
+ "generate_time": 15.794983307520548
1019
+ },
1020
+ {
1021
+ "backend": "text-generation-inference",
1022
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
1023
+ "task": "summary_and_generate",
1024
+ "bits": 16,
1025
+ "ngpus": 2,
1026
+ "reps": 3,
1027
+ "date": "08/18/2023 19:38:40",
1028
+ "git_sha": "6f05e8f1",
1029
+ "n_gpus": 2,
1030
+ "transformers": "4.30.2",
1031
+ "bitsandbytes": "0.41.1",
1032
+ "cuda": "11.7",
1033
+ "hostname": "rippa",
1034
+ "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
1035
+ "summarize_input_len_bytes": 857252,
1036
+ "summarize_output_len_bytes": 1114,
1037
+ "summarize_time": 7.636120955149333,
1038
+ "generate_output_len_bytes": 2275,
1039
+ "generate_time": 7.922623078028361
1040
+ },
1041
+ {
1042
+ "backend": "text-generation-inference",
1043
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
1044
+ "task": "summary_and_generate",
1045
+ "bits": 16,
1046
+ "ngpus": 2,
1047
+ "reps": 3,
1048
+ "date": "08/18/2023 19:41:02",
1049
+ "git_sha": "6f05e8f1",
1050
+ "n_gpus": 2,
1051
+ "transformers": "4.30.2",
1052
+ "bitsandbytes": "0.41.1",
1053
+ "cuda": "11.7",
1054
+ "hostname": "rippa",
1055
+ "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
1056
+ "summarize_input_len_bytes": 857252,
1057
+ "summarize_output_len_bytes": 1024,
1058
+ "summarize_time": 10.824170271555582,
1059
+ "generate_output_len_bytes": 2130,
1060
+ "generate_time": 9.209020694096884
1061
+ },
1062
+ {
1063
+ "backend": "text-generation-inference",
1064
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
1065
+ "task": "summary_and_generate",
1066
+ "bits": 16,
1067
+ "ngpus": 1,
1068
+ "reps": 3,
1069
+ "date": "08/18/2023 19:55:17",
1070
+ "git_sha": "2c548f21",
1071
+ "n_gpus": 1,
1072
+ "transformers": "4.30.2",
1073
+ "bitsandbytes": "0.41.1",
1074
+ "cuda": "11.7",
1075
+ "hostname": "rippa",
1076
+ "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)",
1077
+ "summarize_input_len_bytes": 857252,
1078
+ "summarize_output_len_bytes": 1088,
1079
+ "summarize_time": 24.39883820215861,
1080
+ "generate_output_len_bytes": 2275,
1081
+ "generate_time": 12.755743900934855
1082
+ },
1083
+ {
1084
+ "backend": "transformers",
1085
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
1086
+ "task": "summary_and_generate",
1087
+ "bits": 16,
1088
+ "ngpus": 1,
1089
+ "reps": 3,
1090
+ "date": "08/19/2023 00:57:21",
1091
+ "git_sha": "a227be4f",
1092
+ "n_gpus": 1,
1093
+ "transformers": "4.30.2",
1094
+ "bitsandbytes": "0.41.1",
1095
+ "cuda": "11.8",
1096
+ "hostname": "cloudvm",
1097
+ "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
1098
+ "summarize_input_len_bytes": 857252,
1099
+ "summarize_output_len_bytes": 1267,
1100
+ "summarize_time": 37.113919814427696,
1101
+ "generate_output_len_bytes": 2384,
1102
+ "generate_time": 18.36507821083069
1103
+ },
1104
+ {
1105
+ "backend": "transformers",
1106
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
1107
+ "task": "summary_and_generate",
1108
+ "bits": 16,
1109
+ "ngpus": 1,
1110
+ "reps": 3,
1111
+ "date": "08/19/2023 01:00:31",
1112
+ "git_sha": "a227be4f",
1113
+ "n_gpus": 1,
1114
+ "transformers": "4.30.2",
1115
+ "bitsandbytes": "0.41.1",
1116
+ "cuda": "11.8",
1117
+ "hostname": "cloudvm",
1118
+ "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
1119
+ "summarize_input_len_bytes": 857252,
1120
+ "summarize_output_len_bytes": 1046,
1121
+ "summarize_time": 49.79721482594808,
1122
+ "generate_output_len_bytes": 2172,
1123
+ "generate_time": 21.780913591384888
1124
+ },
1125
+ {
1126
+ "backend": "transformers",
1127
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
1128
+ "task": "summary_and_generate",
1129
+ "bits": 16,
1130
+ "ngpus": 1,
1131
+ "reps": 3,
1132
+ "date": "08/19/2023 01:04:36",
1133
+ "git_sha": "a227be4f",
1134
+ "n_gpus": 1,
1135
+ "transformers": "4.30.2",
1136
+ "bitsandbytes": "0.41.1",
1137
+ "cuda": "11.8",
1138
+ "hostname": "cloudvm",
1139
+ "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
1140
+ "exception": "OOM"
1141
+ },
1142
+ {
1143
+ "backend": "transformers",
1144
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
1145
+ "task": "summary_and_generate",
1146
+ "bits": 8,
1147
+ "ngpus": 1,
1148
+ "reps": 3,
1149
+ "date": "08/19/2023 01:05:26",
1150
+ "git_sha": "a227be4f",
1151
+ "n_gpus": 1,
1152
+ "transformers": "4.30.2",
1153
+ "bitsandbytes": "0.41.1",
1154
+ "cuda": "11.8",
1155
+ "hostname": "cloudvm",
1156
+ "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
1157
+ "summarize_input_len_bytes": 857252,
1158
+ "summarize_output_len_bytes": 1179,
1159
+ "summarize_time": 181.2461258570353,
1160
+ "generate_output_len_bytes": 2772,
1161
+ "generate_time": 92.64811905225118
1162
+ },
1163
+ {
1164
+ "backend": "transformers",
1165
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
1166
+ "task": "summary_and_generate",
1167
+ "bits": 8,
1168
+ "ngpus": 1,
1169
+ "reps": 3,
1170
+ "date": "08/19/2023 01:19:33",
1171
+ "git_sha": "a227be4f",
1172
+ "n_gpus": 1,
1173
+ "transformers": "4.30.2",
1174
+ "bitsandbytes": "0.41.1",
1175
+ "cuda": "11.8",
1176
+ "hostname": "cloudvm",
1177
+ "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
1178
+ "summarize_input_len_bytes": 857252,
1179
+ "summarize_output_len_bytes": 800,
1180
+ "summarize_time": 174.4576851526896,
1181
+ "generate_output_len_bytes": 2713,
1182
+ "generate_time": 119.14412077267964
1183
+ },
1184
+ {
1185
+ "backend": "transformers",
1186
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
1187
+ "task": "summary_and_generate",
1188
+ "bits": 4,
1189
+ "ngpus": 1,
1190
+ "reps": 3,
1191
+ "date": "08/19/2023 01:36:14",
1192
+ "git_sha": "a227be4f",
1193
+ "n_gpus": 1,
1194
+ "transformers": "4.30.2",
1195
+ "bitsandbytes": "0.41.1",
1196
+ "cuda": "11.8",
1197
+ "hostname": "cloudvm",
1198
+ "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
1199
+ "summarize_input_len_bytes": 857252,
1200
+ "summarize_output_len_bytes": 1002,
1201
+ "summarize_time": 53.39731526374817,
1202
+ "generate_output_len_bytes": 2927,
1203
+ "generate_time": 31.369641542434692
1204
+ },
1205
+ {
1206
+ "backend": "transformers",
1207
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
1208
+ "task": "summary_and_generate",
1209
+ "bits": 4,
1210
+ "ngpus": 1,
1211
+ "reps": 3,
1212
+ "date": "08/19/2023 01:40:53",
1213
+ "git_sha": "a227be4f",
1214
+ "n_gpus": 1,
1215
+ "transformers": "4.30.2",
1216
+ "bitsandbytes": "0.41.1",
1217
+ "cuda": "11.8",
1218
+ "hostname": "cloudvm",
1219
+ "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
1220
+ "summarize_input_len_bytes": 857252,
1221
+ "summarize_output_len_bytes": 1000,
1222
+ "summarize_time": 74.27096923192342,
1223
+ "generate_output_len_bytes": 1802,
1224
+ "generate_time": 29.860486666361492
1225
+ },
1226
+ {
1227
+ "backend": "transformers",
1228
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
1229
+ "task": "summary_and_generate",
1230
+ "bits": 16,
1231
+ "ngpus": 2,
1232
+ "reps": 3,
1233
+ "date": "08/19/2023 01:48:09",
1234
+ "git_sha": "a227be4f",
1235
+ "n_gpus": 2,
1236
+ "transformers": "4.30.2",
1237
+ "bitsandbytes": "0.41.1",
1238
+ "cuda": "11.8",
1239
+ "hostname": "cloudvm",
1240
+ "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
1241
+ "summarize_input_len_bytes": 857252,
1242
+ "summarize_output_len_bytes": 1267,
1243
+ "summarize_time": 39.926851193110146,
1244
+ "generate_output_len_bytes": 2384,
1245
+ "generate_time": 18.481745958328247
1246
+ },
1247
+ {
1248
+ "backend": "transformers",
1249
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
1250
+ "task": "summary_and_generate",
1251
+ "bits": 16,
1252
+ "ngpus": 2,
1253
+ "reps": 3,
1254
+ "date": "08/19/2023 01:51:27",
1255
+ "git_sha": "a227be4f",
1256
+ "n_gpus": 2,
1257
+ "transformers": "4.30.2",
1258
+ "bitsandbytes": "0.41.1",
1259
+ "cuda": "11.8",
1260
+ "hostname": "cloudvm",
1261
+ "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
1262
+ "summarize_input_len_bytes": 857252,
1263
+ "summarize_output_len_bytes": 1046,
1264
+ "summarize_time": 51.299002488454185,
1265
+ "generate_output_len_bytes": 2172,
1266
+ "generate_time": 21.828503131866455
1267
+ },
1268
+ {
1269
+ "backend": "transformers",
1270
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
1271
+ "task": "summary_and_generate",
1272
+ "bits": 8,
1273
+ "ngpus": 2,
1274
+ "reps": 3,
1275
+ "date": "08/19/2023 01:56:20",
1276
+ "git_sha": "a227be4f",
1277
+ "n_gpus": 2,
1278
+ "transformers": "4.30.2",
1279
+ "bitsandbytes": "0.41.1",
1280
+ "cuda": "11.8",
1281
+ "hostname": "cloudvm",
1282
+ "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
1283
+ "summarize_input_len_bytes": 857252,
1284
+ "summarize_output_len_bytes": 1179,
1285
+ "summarize_time": 178.19972308476767,
1286
+ "generate_output_len_bytes": 2772,
1287
+ "generate_time": 91.73426882425944
1288
+ },
1289
+ {
1290
+ "backend": "transformers",
1291
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
1292
+ "task": "summary_and_generate",
1293
+ "bits": 8,
1294
+ "ngpus": 2,
1295
+ "reps": 3,
1296
+ "date": "08/19/2023 02:10:13",
1297
+ "git_sha": "a227be4f",
1298
+ "n_gpus": 2,
1299
+ "transformers": "4.30.2",
1300
+ "bitsandbytes": "0.41.1",
1301
+ "cuda": "11.8",
1302
+ "hostname": "cloudvm",
1303
+ "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
1304
+ "summarize_input_len_bytes": 857252,
1305
+ "summarize_output_len_bytes": 800,
1306
+ "summarize_time": 180.7814578215281,
1307
+ "generate_output_len_bytes": 2713,
1308
+ "generate_time": 124.72717420260112
1309
+ },
1310
+ {
1311
+ "backend": "transformers",
1312
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
1313
+ "task": "summary_and_generate",
1314
+ "bits": 4,
1315
+ "ngpus": 2,
1316
+ "reps": 3,
1317
+ "date": "08/19/2023 02:26:43",
1318
+ "git_sha": "a227be4f",
1319
+ "n_gpus": 2,
1320
+ "transformers": "4.30.2",
1321
+ "bitsandbytes": "0.41.1",
1322
+ "cuda": "11.8",
1323
+ "hostname": "cloudvm",
1324
+ "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
1325
+ "summarize_input_len_bytes": 857252,
1326
+ "summarize_output_len_bytes": 1002,
1327
+ "summarize_time": 57.08081785837809,
1328
+ "generate_output_len_bytes": 2927,
1329
+ "generate_time": 32.26534946759542
1330
+ },
1331
+ {
1332
+ "backend": "transformers",
1333
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
1334
+ "task": "summary_and_generate",
1335
+ "bits": 4,
1336
+ "ngpus": 2,
1337
+ "reps": 3,
1338
+ "date": "08/19/2023 02:31:36",
1339
+ "git_sha": "a227be4f",
1340
+ "n_gpus": 2,
1341
+ "transformers": "4.30.2",
1342
+ "bitsandbytes": "0.41.1",
1343
+ "cuda": "11.8",
1344
+ "hostname": "cloudvm",
1345
+ "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
1346
+ "summarize_input_len_bytes": 857252,
1347
+ "summarize_output_len_bytes": 1000,
1348
+ "summarize_time": 79.9461121559143,
1349
+ "generate_output_len_bytes": 1802,
1350
+ "generate_time": 31.403561115264893
1351
+ },
1352
+ {
1353
+ "backend": "transformers",
1354
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
1355
+ "task": "summary_and_generate",
1356
+ "bits": 16,
1357
+ "ngpus": 4,
1358
+ "reps": 3,
1359
+ "date": "08/19/2023 02:38:23",
1360
+ "git_sha": "a227be4f",
1361
+ "n_gpus": 4,
1362
+ "transformers": "4.30.2",
1363
+ "bitsandbytes": "0.41.1",
1364
+ "cuda": "11.8",
1365
+ "hostname": "cloudvm",
1366
+ "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
1367
+ "summarize_input_len_bytes": 857252,
1368
+ "summarize_output_len_bytes": 1267,
1369
+ "summarize_time": 42.33977222442627,
1370
+ "generate_output_len_bytes": 2384,
1371
+ "generate_time": 19.723278522491455
1372
+ },
1373
+ {
1374
+ "backend": "transformers",
1375
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
1376
+ "task": "summary_and_generate",
1377
+ "bits": 16,
1378
+ "ngpus": 4,
1379
+ "reps": 3,
1380
+ "date": "08/19/2023 02:41:52",
1381
+ "git_sha": "a227be4f",
1382
+ "n_gpus": 4,
1383
+ "transformers": "4.30.2",
1384
+ "bitsandbytes": "0.41.1",
1385
+ "cuda": "11.8",
1386
+ "hostname": "cloudvm",
1387
+ "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
1388
+ "summarize_input_len_bytes": 857252,
1389
+ "summarize_output_len_bytes": 1046,
1390
+ "summarize_time": 55.377869288126625,
1391
+ "generate_output_len_bytes": 2172,
1392
+ "generate_time": 25.01458676656087
1393
+ },
1394
+ {
1395
+ "backend": "transformers",
1396
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
1397
+ "task": "summary_and_generate",
1398
+ "bits": 8,
1399
+ "ngpus": 4,
1400
+ "reps": 3,
1401
+ "date": "08/19/2023 02:47:05",
1402
+ "git_sha": "a227be4f",
1403
+ "n_gpus": 4,
1404
+ "transformers": "4.30.2",
1405
+ "bitsandbytes": "0.41.1",
1406
+ "cuda": "11.8",
1407
+ "hostname": "cloudvm",
1408
+ "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
1409
+ "summarize_input_len_bytes": 857252,
1410
+ "summarize_output_len_bytes": 1179,
1411
+ "summarize_time": 180.53432401021323,
1412
+ "generate_output_len_bytes": 2772,
1413
+ "generate_time": 91.93375285466512
1414
+ },
1415
+ {
1416
+ "backend": "transformers",
1417
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
1418
+ "task": "summary_and_generate",
1419
+ "bits": 8,
1420
+ "ngpus": 4,
1421
+ "reps": 3,
1422
+ "date": "08/19/2023 03:01:07",
1423
+ "git_sha": "a227be4f",
1424
+ "n_gpus": 4,
1425
+ "transformers": "4.30.2",
1426
+ "bitsandbytes": "0.41.1",
1427
+ "cuda": "11.8",
1428
+ "hostname": "cloudvm",
1429
+ "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
1430
+ "summarize_input_len_bytes": 857252,
1431
+ "summarize_output_len_bytes": 800,
1432
+ "summarize_time": 179.50477250417075,
1433
+ "generate_output_len_bytes": 2713,
1434
+ "generate_time": 124.40728378295898
1435
+ },
1436
+ {
1437
+ "backend": "transformers",
1438
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
1439
+ "task": "summary_and_generate",
1440
+ "bits": 4,
1441
+ "ngpus": 4,
1442
+ "reps": 3,
1443
+ "date": "08/19/2023 03:17:36",
1444
+ "git_sha": "a227be4f",
1445
+ "n_gpus": 4,
1446
+ "transformers": "4.30.2",
1447
+ "bitsandbytes": "0.41.1",
1448
+ "cuda": "11.8",
1449
+ "hostname": "cloudvm",
1450
+ "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
1451
+ "summarize_input_len_bytes": 857252,
1452
+ "summarize_output_len_bytes": 1002,
1453
+ "summarize_time": 58.62867816289266,
1454
+ "generate_output_len_bytes": 2927,
1455
+ "generate_time": 33.394495725631714
1456
+ },
1457
+ {
1458
+ "backend": "transformers",
1459
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
1460
+ "task": "summary_and_generate",
1461
+ "bits": 4,
1462
+ "ngpus": 4,
1463
+ "reps": 3,
1464
+ "date": "08/19/2023 03:22:37",
1465
+ "git_sha": "a227be4f",
1466
+ "n_gpus": 4,
1467
+ "transformers": "4.30.2",
1468
+ "bitsandbytes": "0.41.1",
1469
+ "cuda": "11.8",
1470
+ "hostname": "cloudvm",
1471
+ "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
1472
+ "summarize_input_len_bytes": 857252,
1473
+ "summarize_output_len_bytes": 1000,
1474
+ "summarize_time": 78.90612125396729,
1475
+ "generate_output_len_bytes": 1802,
1476
+ "generate_time": 30.697617371877033
1477
+ },
1478
+ {
1479
+ "backend": "transformers",
1480
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
1481
+ "task": "summary_and_generate",
1482
+ "bits": 16,
1483
+ "ngpus": 8,
1484
+ "reps": 3,
1485
+ "date": "08/19/2023 03:29:20",
1486
+ "git_sha": "a227be4f",
1487
+ "n_gpus": 8,
1488
+ "transformers": "4.30.2",
1489
+ "bitsandbytes": "0.41.1",
1490
+ "cuda": "11.8",
1491
+ "hostname": "cloudvm",
1492
+ "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
1493
+ "summarize_input_len_bytes": 857252,
1494
+ "summarize_output_len_bytes": 1267,
1495
+ "summarize_time": 40.498607873916626,
1496
+ "generate_output_len_bytes": 2384,
1497
+ "generate_time": 19.509677171707153
1498
+ },
1499
+ {
1500
+ "backend": "transformers",
1501
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
1502
+ "task": "summary_and_generate",
1503
+ "bits": 16,
1504
+ "ngpus": 8,
1505
+ "reps": 3,
1506
+ "date": "08/19/2023 03:32:44",
1507
+ "git_sha": "a227be4f",
1508
+ "n_gpus": 8,
1509
+ "transformers": "4.30.2",
1510
+ "bitsandbytes": "0.41.1",
1511
+ "cuda": "11.8",
1512
+ "hostname": "cloudvm",
1513
+ "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
1514
+ "summarize_input_len_bytes": 857252,
1515
+ "summarize_output_len_bytes": 1046,
1516
+ "summarize_time": 55.3964786529541,
1517
+ "generate_output_len_bytes": 2172,
1518
+ "generate_time": 24.347585439682007
1519
+ },
1520
+ {
1521
+ "backend": "transformers",
1522
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
1523
+ "task": "summary_and_generate",
1524
+ "bits": 8,
1525
+ "ngpus": 8,
1526
+ "reps": 3,
1527
+ "date": "08/19/2023 03:37:55",
1528
+ "git_sha": "a227be4f",
1529
+ "n_gpus": 8,
1530
+ "transformers": "4.30.2",
1531
+ "bitsandbytes": "0.41.1",
1532
+ "cuda": "11.8",
1533
+ "hostname": "cloudvm",
1534
+ "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
1535
+ "summarize_input_len_bytes": 857252,
1536
+ "summarize_output_len_bytes": 1179,
1537
+ "summarize_time": 186.71331850687662,
1538
+ "generate_output_len_bytes": 2772,
1539
+ "generate_time": 95.784650405248
1540
+ },
1541
+ {
1542
+ "backend": "transformers",
1543
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
1544
+ "task": "summary_and_generate",
1545
+ "bits": 8,
1546
+ "ngpus": 8,
1547
+ "reps": 3,
1548
+ "date": "08/19/2023 03:52:28",
1549
+ "git_sha": "a227be4f",
1550
+ "n_gpus": 8,
1551
+ "transformers": "4.30.2",
1552
+ "bitsandbytes": "0.41.1",
1553
+ "cuda": "11.8",
1554
+ "hostname": "cloudvm",
1555
+ "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
1556
+ "summarize_input_len_bytes": 857252,
1557
+ "summarize_output_len_bytes": 800,
1558
+ "summarize_time": 185.3280005455017,
1559
+ "generate_output_len_bytes": 2713,
1560
+ "generate_time": 125.91738017400105
1561
+ },
1562
+ {
1563
+ "backend": "transformers",
1564
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
1565
+ "task": "summary_and_generate",
1566
+ "bits": 4,
1567
+ "ngpus": 8,
1568
+ "reps": 3,
1569
+ "date": "08/19/2023 04:09:18",
1570
+ "git_sha": "a227be4f",
1571
+ "n_gpus": 8,
1572
+ "transformers": "4.30.2",
1573
+ "bitsandbytes": "0.41.1",
1574
+ "cuda": "11.8",
1575
+ "hostname": "cloudvm",
1576
+ "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
1577
+ "summarize_input_len_bytes": 857252,
1578
+ "summarize_output_len_bytes": 1002,
1579
+ "summarize_time": 60.18280680974325,
1580
+ "generate_output_len_bytes": 2927,
1581
+ "generate_time": 33.386961142222084
1582
+ },
1583
+ {
1584
+ "backend": "transformers",
1585
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
1586
+ "task": "summary_and_generate",
1587
+ "bits": 4,
1588
+ "ngpus": 8,
1589
+ "reps": 3,
1590
+ "date": "08/19/2023 04:14:25",
1591
+ "git_sha": "a227be4f",
1592
+ "n_gpus": 8,
1593
+ "transformers": "4.30.2",
1594
+ "bitsandbytes": "0.41.1",
1595
+ "cuda": "11.8",
1596
+ "hostname": "cloudvm",
1597
+ "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
1598
+ "summarize_input_len_bytes": 857252,
1599
+ "summarize_output_len_bytes": 1000,
1600
+ "summarize_time": 83.04790727297465,
1601
+ "generate_output_len_bytes": 1802,
1602
+ "generate_time": 32.24992283185323
1603
+ },
1604
+ {
1605
+ "backend": "transformers",
1606
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
1607
+ "task": "summary_and_generate",
1608
+ "bits": 16,
1609
+ "ngpus": 1,
1610
+ "reps": 3,
1611
+ "date": "08/18/2023 23:26:19",
1612
+ "git_sha": "0cdb75ef",
1613
+ "n_gpus": 1,
1614
+ "transformers": "4.30.2",
1615
+ "bitsandbytes": "0.41.1",
1616
+ "cuda": "11.7",
1617
+ "hostname": "recypabaszmhhmuae",
1618
+ "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
1619
+ "summarize_input_len_bytes": 857252,
1620
+ "summarize_output_len_bytes": 1417,
1621
+ "summarize_time": 47.03754989306132,
1622
+ "generate_output_len_bytes": 2384,
1623
+ "generate_time": 19.964784463246662
1624
+ },
1625
+ {
1626
+ "backend": "transformers",
1627
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
1628
+ "task": "summary_and_generate",
1629
+ "bits": 16,
1630
+ "ngpus": 1,
1631
+ "reps": 3,
1632
+ "date": "08/18/2023 23:33:09",
1633
+ "git_sha": "0cdb75ef",
1634
+ "n_gpus": 1,
1635
+ "transformers": "4.30.2",
1636
+ "bitsandbytes": "0.41.1",
1637
+ "cuda": "11.7",
1638
+ "hostname": "recypabaszmhhmuae",
1639
+ "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
1640
+ "summarize_input_len_bytes": 857252,
1641
+ "summarize_output_len_bytes": 915,
1642
+ "summarize_time": 71.91136892636617,
1643
+ "generate_output_len_bytes": 2480,
1644
+ "generate_time": 33.6295014222463
1645
+ },
1646
+ {
1647
+ "backend": "transformers",
1648
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
1649
+ "task": "summary_and_generate",
1650
+ "bits": 16,
1651
+ "ngpus": 1,
1652
+ "reps": 3,
1653
+ "date": "08/18/2023 23:44:08",
1654
+ "git_sha": "0cdb75ef",
1655
+ "n_gpus": 1,
1656
+ "transformers": "4.30.2",
1657
+ "bitsandbytes": "0.41.1",
1658
+ "cuda": "11.7",
1659
+ "hostname": "recypabaszmhhmuae",
1660
+ "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
1661
+ "exception": "OOM"
1662
+ },
1663
+ {
1664
+ "backend": "transformers",
1665
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
1666
+ "task": "summary_and_generate",
1667
+ "bits": 8,
1668
+ "ngpus": 1,
1669
+ "reps": 3,
1670
+ "date": "08/19/2023 00:45:42",
1671
+ "git_sha": "0cdb75ef",
1672
+ "n_gpus": 1,
1673
+ "transformers": "4.30.2",
1674
+ "bitsandbytes": "0.41.1",
1675
+ "cuda": "11.7",
1676
+ "hostname": "recypabaszmhhmuae",
1677
+ "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
1678
+ "summarize_input_len_bytes": 857252,
1679
+ "summarize_output_len_bytes": 1007,
1680
+ "summarize_time": 148.61560583114624,
1681
+ "generate_output_len_bytes": 2357,
1682
+ "generate_time": 89.01266026496887
1683
+ },
1684
+ {
1685
+ "backend": "transformers",
1686
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
1687
+ "task": "summary_and_generate",
1688
+ "bits": 8,
1689
+ "ngpus": 1,
1690
+ "reps": 3,
1691
+ "date": "08/19/2023 00:58:00",
1692
+ "git_sha": "0cdb75ef",
1693
+ "n_gpus": 1,
1694
+ "transformers": "4.30.2",
1695
+ "bitsandbytes": "0.41.1",
1696
+ "cuda": "11.7",
1697
+ "hostname": "recypabaszmhhmuae",
1698
+ "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
1699
+ "summarize_input_len_bytes": 857252,
1700
+ "summarize_output_len_bytes": 763,
1701
+ "summarize_time": 193.99270629882812,
1702
+ "generate_output_len_bytes": 2129,
1703
+ "generate_time": 95.66660761833191
1704
+ },
1705
+ {
1706
+ "backend": "transformers",
1707
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
1708
+ "task": "summary_and_generate",
1709
+ "bits": 8,
1710
+ "ngpus": 1,
1711
+ "reps": 3,
1712
+ "date": "08/19/2023 01:13:01",
1713
+ "git_sha": "0cdb75ef",
1714
+ "n_gpus": 1,
1715
+ "transformers": "4.30.2",
1716
+ "bitsandbytes": "0.41.1",
1717
+ "cuda": "11.7",
1718
+ "hostname": "recypabaszmhhmuae",
1719
+ "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
1720
+ "exception": "OOM"
1721
+ },
1722
+ {
1723
+ "backend": "transformers",
1724
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
1725
+ "task": "summary_and_generate",
1726
+ "bits": 4,
1727
+ "ngpus": 1,
1728
+ "reps": 3,
1729
+ "date": "08/19/2023 01:13:55",
1730
+ "git_sha": "0cdb75ef",
1731
+ "n_gpus": 1,
1732
+ "transformers": "4.30.2",
1733
+ "bitsandbytes": "0.41.1",
1734
+ "cuda": "11.7",
1735
+ "hostname": "recypabaszmhhmuae",
1736
+ "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
1737
+ "summarize_input_len_bytes": 857252,
1738
+ "summarize_output_len_bytes": 991,
1739
+ "summarize_time": 61.52411222457886,
1740
+ "generate_output_len_bytes": 2927,
1741
+ "generate_time": 32.030215660730995
1742
+ },
1743
+ {
1744
+ "backend": "transformers",
1745
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
1746
+ "task": "summary_and_generate",
1747
+ "bits": 4,
1748
+ "ngpus": 1,
1749
+ "reps": 3,
1750
+ "date": "08/19/2023 01:19:00",
1751
+ "git_sha": "0cdb75ef",
1752
+ "n_gpus": 1,
1753
+ "transformers": "4.30.2",
1754
+ "bitsandbytes": "0.41.1",
1755
+ "cuda": "11.7",
1756
+ "hostname": "recypabaszmhhmuae",
1757
+ "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
1758
+ "summarize_input_len_bytes": 857252,
1759
+ "summarize_output_len_bytes": 1000,
1760
+ "summarize_time": 81.13888708750407,
1761
+ "generate_output_len_bytes": 3486,
1762
+ "generate_time": 55.5331826210022
1763
+ },
1764
+ {
1765
+ "backend": "transformers",
1766
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
1767
+ "task": "summary_and_generate",
1768
+ "bits": 16,
1769
+ "ngpus": 2,
1770
+ "reps": 3,
1771
+ "date": "08/19/2023 01:27:49",
1772
+ "git_sha": "0cdb75ef",
1773
+ "n_gpus": 2,
1774
+ "transformers": "4.30.2",
1775
+ "bitsandbytes": "0.41.1",
1776
+ "cuda": "11.7",
1777
+ "hostname": "recypabaszmhhmuae",
1778
+ "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
1779
+ "summarize_input_len_bytes": 857252,
1780
+ "summarize_output_len_bytes": 1417,
1781
+ "summarize_time": 47.41046245892843,
1782
+ "generate_output_len_bytes": 2384,
1783
+ "generate_time": 20.660600344340008
1784
+ },
1785
+ {
1786
+ "backend": "transformers",
1787
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
1788
+ "task": "summary_and_generate",
1789
+ "bits": 16,
1790
+ "ngpus": 2,
1791
+ "reps": 3,
1792
+ "date": "08/19/2023 01:34:28",
1793
+ "git_sha": "0cdb75ef",
1794
+ "n_gpus": 2,
1795
+ "transformers": "4.30.2",
1796
+ "bitsandbytes": "0.41.1",
1797
+ "cuda": "11.7",
1798
+ "hostname": "recypabaszmhhmuae",
1799
+ "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
1800
+ "summarize_input_len_bytes": 857252,
1801
+ "summarize_output_len_bytes": 915,
1802
+ "summarize_time": 72.85646979014079,
1803
+ "generate_output_len_bytes": 2480,
1804
+ "generate_time": 34.05861854553223
1805
+ },
1806
+ {
1807
+ "backend": "transformers",
1808
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
1809
+ "task": "summary_and_generate",
1810
+ "bits": 8,
1811
+ "ngpus": 2,
1812
+ "reps": 3,
1813
+ "date": "08/19/2023 02:39:22",
1814
+ "git_sha": "0cdb75ef",
1815
+ "n_gpus": 2,
1816
+ "transformers": "4.30.2",
1817
+ "bitsandbytes": "0.41.1",
1818
+ "cuda": "11.7",
1819
+ "hostname": "recypabaszmhhmuae",
1820
+ "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
1821
+ "summarize_input_len_bytes": 857252,
1822
+ "summarize_output_len_bytes": 1007,
1823
+ "summarize_time": 152.54357608159384,
1824
+ "generate_output_len_bytes": 2357,
1825
+ "generate_time": 91.51808977127075
1826
+ },
1827
+ {
1828
+ "backend": "transformers",
1829
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
1830
+ "task": "summary_and_generate",
1831
+ "bits": 8,
1832
+ "ngpus": 2,
1833
+ "reps": 3,
1834
+ "date": "08/19/2023 02:52:58",
1835
+ "git_sha": "0cdb75ef",
1836
+ "n_gpus": 2,
1837
+ "transformers": "4.30.2",
1838
+ "bitsandbytes": "0.41.1",
1839
+ "cuda": "11.7",
1840
+ "hostname": "recypabaszmhhmuae",
1841
+ "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
1842
+ "summarize_input_len_bytes": 857252,
1843
+ "summarize_output_len_bytes": 763,
1844
+ "summarize_time": 195.92926557858786,
1845
+ "generate_output_len_bytes": 2129,
1846
+ "generate_time": 96.55542047818501
1847
+ },
1848
+ {
1849
+ "backend": "transformers",
1850
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
1851
+ "task": "summary_and_generate",
1852
+ "bits": 4,
1853
+ "ngpus": 2,
1854
+ "reps": 3,
1855
+ "date": "08/19/2023 03:15:01",
1856
+ "git_sha": "0cdb75ef",
1857
+ "n_gpus": 2,
1858
+ "transformers": "4.30.2",
1859
+ "bitsandbytes": "0.41.1",
1860
+ "cuda": "11.7",
1861
+ "hostname": "recypabaszmhhmuae",
1862
+ "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
1863
+ "summarize_input_len_bytes": 857252,
1864
+ "summarize_output_len_bytes": 991,
1865
+ "summarize_time": 64.64422671000163,
1866
+ "generate_output_len_bytes": 2927,
1867
+ "generate_time": 33.30378039677938
1868
+ },
1869
+ {
1870
+ "backend": "transformers",
1871
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
1872
+ "task": "summary_and_generate",
1873
+ "bits": 4,
1874
+ "ngpus": 2,
1875
+ "reps": 3,
1876
+ "date": "08/19/2023 03:20:19",
1877
+ "git_sha": "0cdb75ef",
1878
+ "n_gpus": 2,
1879
+ "transformers": "4.30.2",
1880
+ "bitsandbytes": "0.41.1",
1881
+ "cuda": "11.7",
1882
+ "hostname": "recypabaszmhhmuae",
1883
+ "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
1884
+ "summarize_input_len_bytes": 857252,
1885
+ "summarize_output_len_bytes": 1000,
1886
+ "summarize_time": 84.57761120796204,
1887
+ "generate_output_len_bytes": 3486,
1888
+ "generate_time": 57.59072462717692
1889
+ },
1890
+ {
1891
+ "backend": "transformers",
1892
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
1893
+ "task": "summary_and_generate",
1894
+ "bits": 16,
1895
+ "ngpus": 4,
1896
+ "reps": 3,
1897
+ "date": "08/19/2023 03:28:44",
1898
+ "git_sha": "0cdb75ef",
1899
+ "n_gpus": 4,
1900
+ "transformers": "4.30.2",
1901
+ "bitsandbytes": "0.41.1",
1902
+ "cuda": "11.7",
1903
+ "hostname": "recypabaszmhhmuae",
1904
+ "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
1905
+ "summarize_input_len_bytes": 857252,
1906
+ "summarize_output_len_bytes": 1417,
1907
+ "summarize_time": 49.08898218472799,
1908
+ "generate_output_len_bytes": 2384,
1909
+ "generate_time": 21.489527861277264
1910
+ },
1911
+ {
1912
+ "backend": "transformers",
1913
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
1914
+ "task": "summary_and_generate",
1915
+ "bits": 16,
1916
+ "ngpus": 4,
1917
+ "reps": 3,
1918
+ "date": "08/19/2023 03:32:39",
1919
+ "git_sha": "0cdb75ef",
1920
+ "n_gpus": 4,
1921
+ "transformers": "4.30.2",
1922
+ "bitsandbytes": "0.41.1",
1923
+ "cuda": "11.7",
1924
+ "hostname": "recypabaszmhhmuae",
1925
+ "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
1926
+ "summarize_input_len_bytes": 857252,
1927
+ "summarize_output_len_bytes": 915,
1928
+ "summarize_time": 74.43774898846944,
1929
+ "generate_output_len_bytes": 2480,
1930
+ "generate_time": 34.72673638661703
1931
+ },
1932
+ {
1933
+ "backend": "transformers",
1934
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
1935
+ "task": "summary_and_generate",
1936
+ "bits": 8,
1937
+ "ngpus": 4,
1938
+ "reps": 3,
1939
+ "date": "08/19/2023 03:39:21",
1940
+ "git_sha": "0cdb75ef",
1941
+ "n_gpus": 4,
1942
+ "transformers": "4.30.2",
1943
+ "bitsandbytes": "0.41.1",
1944
+ "cuda": "11.7",
1945
+ "hostname": "recypabaszmhhmuae",
1946
+ "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
1947
+ "summarize_input_len_bytes": 857252,
1948
+ "summarize_output_len_bytes": 1007,
1949
+ "summarize_time": 153.41076453526816,
1950
+ "generate_output_len_bytes": 2357,
1951
+ "generate_time": 91.14894040425618
1952
+ },
1953
+ {
1954
+ "backend": "transformers",
1955
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
1956
+ "task": "summary_and_generate",
1957
+ "bits": 8,
1958
+ "ngpus": 4,
1959
+ "reps": 3,
1960
+ "date": "08/19/2023 03:52:00",
1961
+ "git_sha": "0cdb75ef",
1962
+ "n_gpus": 4,
1963
+ "transformers": "4.30.2",
1964
+ "bitsandbytes": "0.41.1",
1965
+ "cuda": "11.7",
1966
+ "hostname": "recypabaszmhhmuae",
1967
+ "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
1968
+ "summarize_input_len_bytes": 857252,
1969
+ "summarize_output_len_bytes": 763,
1970
+ "summarize_time": 199.79869039853415,
1971
+ "generate_output_len_bytes": 2129,
1972
+ "generate_time": 98.61504419644673
1973
+ },
1974
+ {
1975
+ "backend": "transformers",
1976
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
1977
+ "task": "summary_and_generate",
1978
+ "bits": 4,
1979
+ "ngpus": 4,
1980
+ "reps": 3,
1981
+ "date": "08/19/2023 04:08:12",
1982
+ "git_sha": "0cdb75ef",
1983
+ "n_gpus": 4,
1984
+ "transformers": "4.30.2",
1985
+ "bitsandbytes": "0.41.1",
1986
+ "cuda": "11.7",
1987
+ "hostname": "recypabaszmhhmuae",
1988
+ "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
1989
+ "summarize_input_len_bytes": 857252,
1990
+ "summarize_output_len_bytes": 991,
1991
+ "summarize_time": 66.49260465304057,
1992
+ "generate_output_len_bytes": 2927,
1993
+ "generate_time": 34.17951035499573
1994
+ },
1995
+ {
1996
+ "backend": "transformers",
1997
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
1998
+ "task": "summary_and_generate",
1999
+ "bits": 4,
2000
+ "ngpus": 4,
2001
+ "reps": 3,
2002
+ "date": "08/19/2023 04:13:39",
2003
+ "git_sha": "0cdb75ef",
2004
+ "n_gpus": 4,
2005
+ "transformers": "4.30.2",
2006
+ "bitsandbytes": "0.41.1",
2007
+ "cuda": "11.7",
2008
+ "hostname": "recypabaszmhhmuae",
2009
+ "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
2010
+ "summarize_input_len_bytes": 857252,
2011
+ "summarize_output_len_bytes": 1000,
2012
+ "summarize_time": 87.65787092844646,
2013
+ "generate_output_len_bytes": 3486,
2014
+ "generate_time": 59.3750696182251
2015
+ },
2016
+ {
2017
+ "backend": "transformers",
2018
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
2019
+ "task": "summary_and_generate",
2020
+ "bits": 4,
2021
+ "ngpus": 1,
2022
+ "reps": 3,
2023
+ "date": "08/18/2023 22:22:24",
2024
+ "git_sha": "b63768c6",
2025
+ "n_gpus": 1,
2026
+ "transformers": "4.31.0",
2027
+ "bitsandbytes": "0.41.1",
2028
+ "cuda": "11.7",
2029
+ "hostname": "rippa",
2030
+ "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
2031
+ "summarize_input_len_bytes": 857252,
2032
+ "summarize_output_len_bytes": 948,
2033
+ "summarize_time": 122.13213857014973,
2034
+ "generate_output_len_bytes": 2826,
2035
+ "generate_time": 66.34098903338115
2036
+ },
2037
+ {
2038
+ "backend": "transformers",
2039
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
2040
+ "task": "summary_and_generate",
2041
+ "bits": 4,
2042
+ "ngpus": 2,
2043
+ "reps": 3,
2044
+ "date": "08/18/2023 22:33:33",
2045
+ "git_sha": "c1348fb3",
2046
+ "n_gpus": 2,
2047
+ "transformers": "4.31.0",
2048
+ "bitsandbytes": "0.41.1",
2049
+ "cuda": "11.7",
2050
+ "hostname": "rippa",
2051
+ "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)",
2052
+ "summarize_input_len_bytes": 857252,
2053
+ "summarize_output_len_bytes": 948,
2054
+ "summarize_time": 120.53812781969707,
2055
+ "generate_output_len_bytes": 2826,
2056
+ "generate_time": 67.28052496910095
2057
+ },
2058
+ {
2059
+ "backend": "text-generation-inference",
2060
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
2061
+ "task": "summary_and_generate",
2062
+ "bits": 16,
2063
+ "ngpus": 1,
2064
+ "reps": 3,
2065
+ "date": "08/18/2023 22:56:52",
2066
+ "git_sha": "fb84de76",
2067
+ "n_gpus": 1,
2068
+ "transformers": "4.31.0",
2069
+ "bitsandbytes": "0.41.1",
2070
+ "cuda": "11.7",
2071
+ "hostname": "timemachine",
2072
+ "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
2073
+ "summarize_input_len_bytes": 857252,
2074
+ "summarize_output_len_bytes": 1036,
2075
+ "summarize_time": 29.128981749216717,
2076
+ "generate_output_len_bytes": 2242,
2077
+ "generate_time": 12.197122732798258
2078
+ },
2079
+ {
2080
+ "backend": "text-generation-inference",
2081
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
2082
+ "task": "summary_and_generate",
2083
+ "bits": 16,
2084
+ "ngpus": 1,
2085
+ "reps": 3,
2086
+ "date": "08/18/2023 23:00:33",
2087
+ "git_sha": "fb84de76",
2088
+ "n_gpus": 1,
2089
+ "transformers": "4.31.0",
2090
+ "bitsandbytes": "0.41.1",
2091
+ "cuda": "11.7",
2092
+ "hostname": "timemachine",
2093
+ "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)",
2094
+ "exception": "OOM"
2095
+ },
2096
+ {
2097
+ "backend": "transformers",
2098
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
2099
+ "task": "summary_and_generate",
2100
+ "bits": 16,
2101
+ "ngpus": 1,
2102
+ "reps": 3,
2103
+ "date": "08/19/2023 05:47:43",
2104
+ "git_sha": "22352acd",
2105
+ "n_gpus": 1,
2106
+ "transformers": "4.31.0",
2107
+ "bitsandbytes": "0.41.1",
2108
+ "cuda": "11.7",
2109
+ "hostname": "recypabaszmhhmuae",
2110
+ "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
2111
+ "exception": "OOM"
2112
+ },
2113
+ {
2114
+ "backend": "transformers",
2115
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
2116
+ "task": "summary_and_generate",
2117
+ "bits": 8,
2118
+ "ngpus": 1,
2119
+ "reps": 3,
2120
+ "date": "08/19/2023 05:48:58",
2121
+ "git_sha": "22352acd",
2122
+ "n_gpus": 1,
2123
+ "transformers": "4.31.0",
2124
+ "bitsandbytes": "0.41.1",
2125
+ "cuda": "11.7",
2126
+ "hostname": "recypabaszmhhmuae",
2127
+ "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
2128
+ "exception": "OOM"
2129
+ },
2130
+ {
2131
+ "backend": "transformers",
2132
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
2133
+ "task": "summary_and_generate",
2134
+ "bits": 4,
2135
+ "ngpus": 1,
2136
+ "reps": 3,
2137
+ "date": "08/19/2023 05:50:40",
2138
+ "git_sha": "22352acd",
2139
+ "n_gpus": 1,
2140
+ "transformers": "4.31.0",
2141
+ "bitsandbytes": "0.41.1",
2142
+ "cuda": "11.7",
2143
+ "hostname": "recypabaszmhhmuae",
2144
+ "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
2145
+ "summarize_input_len_bytes": 857252,
2146
+ "summarize_output_len_bytes": 948,
2147
+ "summarize_time": 165.05752809842429,
2148
+ "generate_output_len_bytes": 2605,
2149
+ "generate_time": 93.80659619967143
2150
+ },
2151
+ {
2152
+ "backend": "transformers",
2153
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
2154
+ "task": "summary_and_generate",
2155
+ "bits": 16,
2156
+ "ngpus": 2,
2157
+ "reps": 3,
2158
+ "date": "08/19/2023 06:05:51",
2159
+ "git_sha": "22352acd",
2160
+ "n_gpus": 2,
2161
+ "transformers": "4.31.0",
2162
+ "bitsandbytes": "0.41.1",
2163
+ "cuda": "11.7",
2164
+ "hostname": "recypabaszmhhmuae",
2165
+ "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
2166
+ "exception": "OOM"
2167
+ },
2168
+ {
2169
+ "backend": "transformers",
2170
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
2171
+ "task": "summary_and_generate",
2172
+ "bits": 8,
2173
+ "ngpus": 2,
2174
+ "reps": 3,
2175
+ "date": "08/19/2023 06:10:05",
2176
+ "git_sha": "22352acd",
2177
+ "n_gpus": 2,
2178
+ "transformers": "4.31.0",
2179
+ "bitsandbytes": "0.41.1",
2180
+ "cuda": "11.7",
2181
+ "hostname": "recypabaszmhhmuae",
2182
+ "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
2183
+ "summarize_input_len_bytes": 857252,
2184
+ "summarize_output_len_bytes": 906,
2185
+ "summarize_time": 410.0691332022349,
2186
+ "generate_output_len_bytes": 521,
2187
+ "generate_time": 57.71272214253744
2188
+ },
2189
+ {
2190
+ "backend": "transformers",
2191
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
2192
+ "task": "summary_and_generate",
2193
+ "bits": 4,
2194
+ "ngpus": 2,
2195
+ "reps": 3,
2196
+ "date": "08/19/2023 06:36:58",
2197
+ "git_sha": "22352acd",
2198
+ "n_gpus": 2,
2199
+ "transformers": "4.31.0",
2200
+ "bitsandbytes": "0.41.1",
2201
+ "cuda": "11.7",
2202
+ "hostname": "recypabaszmhhmuae",
2203
+ "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
2204
+ "summarize_input_len_bytes": 857252,
2205
+ "summarize_output_len_bytes": 948,
2206
+ "summarize_time": 171.74388321240744,
2207
+ "generate_output_len_bytes": 2605,
2208
+ "generate_time": 97.00725762049358
2209
+ },
2210
+ {
2211
+ "backend": "transformers",
2212
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
2213
+ "task": "summary_and_generate",
2214
+ "bits": 16,
2215
+ "ngpus": 4,
2216
+ "reps": 3,
2217
+ "date": "08/19/2023 06:51:13",
2218
+ "git_sha": "22352acd",
2219
+ "n_gpus": 4,
2220
+ "transformers": "4.31.0",
2221
+ "bitsandbytes": "0.41.1",
2222
+ "cuda": "11.7",
2223
+ "hostname": "recypabaszmhhmuae",
2224
+ "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
2225
+ "summarize_input_len_bytes": 857252,
2226
+ "summarize_output_len_bytes": 792,
2227
+ "summarize_time": 267.0555826822917,
2228
+ "generate_output_len_bytes": 2783,
2229
+ "generate_time": 163.99818523724875
2230
+ },
2231
+ {
2232
+ "backend": "transformers",
2233
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
2234
+ "task": "summary_and_generate",
2235
+ "bits": 8,
2236
+ "ngpus": 4,
2237
+ "reps": 3,
2238
+ "date": "08/19/2023 07:13:35",
2239
+ "git_sha": "22352acd",
2240
+ "n_gpus": 4,
2241
+ "transformers": "4.31.0",
2242
+ "bitsandbytes": "0.41.1",
2243
+ "cuda": "11.7",
2244
+ "hostname": "recypabaszmhhmuae",
2245
+ "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
2246
+ "summarize_input_len_bytes": 857252,
2247
+ "summarize_output_len_bytes": 906,
2248
+ "summarize_time": 413.9569679101308,
2249
+ "generate_output_len_bytes": 521,
2250
+ "generate_time": 58.52583885192871
2251
+ },
2252
+ {
2253
+ "backend": "transformers",
2254
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
2255
+ "task": "summary_and_generate",
2256
+ "bits": 4,
2257
+ "ngpus": 4,
2258
+ "reps": 3,
2259
+ "date": "08/19/2023 07:38:02",
2260
+ "git_sha": "22352acd",
2261
+ "n_gpus": 4,
2262
+ "transformers": "4.31.0",
2263
+ "bitsandbytes": "0.41.1",
2264
+ "cuda": "11.7",
2265
+ "hostname": "recypabaszmhhmuae",
2266
+ "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
2267
+ "summarize_input_len_bytes": 857252,
2268
+ "summarize_output_len_bytes": 948,
2269
+ "summarize_time": 175.4907926718394,
2270
+ "generate_output_len_bytes": 2605,
2271
+ "generate_time": 98.97720170021057
2272
+ },
2273
+ {
2274
+ "backend": "text-generation-inference",
2275
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
2276
+ "task": "summary_and_generate",
2277
+ "bits": 16,
2278
+ "ngpus": 2,
2279
+ "reps": 3,
2280
+ "date": "08/19/2023 12:35:08",
2281
+ "git_sha": "29a002e5",
2282
+ "n_gpus": 2,
2283
+ "transformers": "4.31.0",
2284
+ "bitsandbytes": "0.41.1",
2285
+ "cuda": "11.7",
2286
+ "hostname": "timemachine",
2287
+ "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)",
2288
+ "summarize_input_len_bytes": 857252,
2289
+ "summarize_output_len_bytes": 983,
2290
+ "summarize_time": 42.21107586224874,
2291
+ "generate_output_len_bytes": 2130,
2292
+ "generate_time": 16.94527777036031
2293
+ },
2294
+ {
2295
+ "backend": "text-generation-inference",
2296
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
2297
+ "task": "summary_and_generate",
2298
+ "bits": 16,
2299
+ "ngpus": 2,
2300
+ "reps": 3,
2301
+ "date": "08/21/2023 20:03:36",
2302
+ "git_sha": "51318f44",
2303
+ "n_gpus": 2,
2304
+ "transformers": "4.31.0",
2305
+ "bitsandbytes": "0.41.1",
2306
+ "cuda": "11.7",
2307
+ "hostname": "recypabaszmhhmuae",
2308
+ "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
2309
+ "summarize_input_len_bytes": 857252,
2310
+ "summarize_output_len_bytes": 1267,
2311
+ "summarize_time": 41.0461368560791,
2312
+ "generate_output_len_bytes": 2383,
2313
+ "generate_time": 19.614749511082966
2314
+ },
2315
+ {
2316
+ "backend": "text-generation-inference",
2317
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
2318
+ "task": "summary_and_generate",
2319
+ "bits": 16,
2320
+ "ngpus": 4,
2321
+ "reps": 3,
2322
+ "date": "08/21/2023 20:07:35",
2323
+ "git_sha": "51318f44",
2324
+ "n_gpus": 4,
2325
+ "transformers": "4.31.0",
2326
+ "bitsandbytes": "0.41.1",
2327
+ "cuda": "11.7",
2328
+ "hostname": "recypabaszmhhmuae",
2329
+ "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
2330
+ "summarize_input_len_bytes": 857252,
2331
+ "summarize_output_len_bytes": 1267,
2332
+ "summarize_time": 42.8376894791921,
2333
+ "generate_output_len_bytes": 2383,
2334
+ "generate_time": 20.2719091574351
2335
+ },
2336
+ {
2337
+ "backend": "text-generation-inference",
2338
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
2339
+ "task": "summary_and_generate",
2340
+ "bits": 16,
2341
+ "ngpus": 1,
2342
+ "reps": 3,
2343
+ "date": "08/21/2023 20:42:46",
2344
+ "git_sha": "2f4bb620",
2345
+ "n_gpus": 1,
2346
+ "transformers": "4.31.0",
2347
+ "bitsandbytes": "0.41.1",
2348
+ "cuda": "11.7",
2349
+ "hostname": "recypabaszmhhmuae",
2350
+ "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
2351
+ "exception": "OOM"
2352
+ },
2353
+ {
2354
+ "backend": "text-generation-inference",
2355
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
2356
+ "task": "summary_and_generate",
2357
+ "bits": 16,
2358
+ "ngpus": 4,
2359
+ "reps": 3,
2360
+ "date": "08/21/2023 20:50:19",
2361
+ "git_sha": "2f4bb620",
2362
+ "n_gpus": 4,
2363
+ "transformers": "4.31.0",
2364
+ "bitsandbytes": "0.41.1",
2365
+ "cuda": "11.7",
2366
+ "hostname": "recypabaszmhhmuae",
2367
+ "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
2368
+ "summarize_input_len_bytes": 857252,
2369
+ "summarize_output_len_bytes": 915,
2370
+ "summarize_time": 66.52468911806743,
2371
+ "generate_output_len_bytes": 2479,
2372
+ "generate_time": 29.828714847564697
2373
+ },
2374
+ {
2375
+ "backend": "text-generation-inference",
2376
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
2377
+ "task": "summary_and_generate",
2378
+ "bits": 16,
2379
+ "ngpus": 4,
2380
+ "reps": 3,
2381
+ "date": "08/21/2023 20:56:04",
2382
+ "git_sha": "2f4bb620",
2383
+ "n_gpus": 4,
2384
+ "transformers": "4.31.0",
2385
+ "bitsandbytes": "0.41.1",
2386
+ "cuda": "11.7",
2387
+ "hostname": "recypabaszmhhmuae",
2388
+ "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)",
2389
+ "exception": "OOM"
2390
+ },
2391
+ {
2392
+ "backend": "text-generation-inference",
2393
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
2394
+ "task": "summary_and_generate",
2395
+ "bits": 16,
2396
+ "ngpus": 1,
2397
+ "reps": 3,
2398
+ "date": "08/21/2023 19:55:35",
2399
+ "git_sha": "51318f44",
2400
+ "n_gpus": 1,
2401
+ "transformers": "4.31.0",
2402
+ "bitsandbytes": "0.41.1",
2403
+ "cuda": "11.8",
2404
+ "hostname": "cloudvm",
2405
+ "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
2406
+ "summarize_input_len_bytes": 857252,
2407
+ "summarize_output_len_bytes": 1267,
2408
+ "summarize_time": 38.753786404927574,
2409
+ "generate_output_len_bytes": 2383,
2410
+ "generate_time": 19.529522736867268
2411
+ },
2412
+ {
2413
+ "backend": "text-generation-inference",
2414
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
2415
+ "task": "summary_and_generate",
2416
+ "bits": 16,
2417
+ "ngpus": 2,
2418
+ "reps": 3,
2419
+ "date": "08/21/2023 20:36:13",
2420
+ "git_sha": "51318f44",
2421
+ "n_gpus": 2,
2422
+ "transformers": "4.31.0",
2423
+ "bitsandbytes": "0.41.1",
2424
+ "cuda": "11.8",
2425
+ "hostname": "cloudvm",
2426
+ "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
2427
+ "summarize_input_len_bytes": 857252,
2428
+ "summarize_output_len_bytes": 1267,
2429
+ "summarize_time": 41.024452924728394,
2430
+ "generate_output_len_bytes": 2383,
2431
+ "generate_time": 20.29120985666911
2432
+ },
2433
+ {
2434
+ "backend": "text-generation-inference",
2435
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
2436
+ "task": "summary_and_generate",
2437
+ "bits": 16,
2438
+ "ngpus": 2,
2439
+ "reps": 3,
2440
+ "date": "08/21/2023 20:40:08",
2441
+ "git_sha": "51318f44",
2442
+ "n_gpus": 2,
2443
+ "transformers": "4.31.0",
2444
+ "bitsandbytes": "0.41.1",
2445
+ "cuda": "11.8",
2446
+ "hostname": "cloudvm",
2447
+ "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
2448
+ "summarize_input_len_bytes": 857252,
2449
+ "summarize_output_len_bytes": 1046,
2450
+ "summarize_time": 54.554532527923584,
2451
+ "generate_output_len_bytes": 2171,
2452
+ "generate_time": 24.604793945948284
2453
+ },
2454
+ {
2455
+ "backend": "text-generation-inference",
2456
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
2457
+ "task": "summary_and_generate",
2458
+ "bits": 16,
2459
+ "ngpus": 4,
2460
+ "reps": 3,
2461
+ "date": "08/21/2023 20:50:05",
2462
+ "git_sha": "51318f44",
2463
+ "n_gpus": 4,
2464
+ "transformers": "4.31.0",
2465
+ "bitsandbytes": "0.41.1",
2466
+ "cuda": "11.8",
2467
+ "hostname": "cloudvm",
2468
+ "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
2469
+ "summarize_input_len_bytes": 857252,
2470
+ "summarize_output_len_bytes": 1267,
2471
+ "summarize_time": 41.09950613975525,
2472
+ "generate_output_len_bytes": 2383,
2473
+ "generate_time": 20.947362899780273
2474
+ },
2475
+ {
2476
+ "backend": "text-generation-inference",
2477
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
2478
+ "task": "summary_and_generate",
2479
+ "bits": 16,
2480
+ "ngpus": 4,
2481
+ "reps": 3,
2482
+ "date": "08/21/2023 20:54:08",
2483
+ "git_sha": "51318f44",
2484
+ "n_gpus": 4,
2485
+ "transformers": "4.31.0",
2486
+ "bitsandbytes": "0.41.1",
2487
+ "cuda": "11.8",
2488
+ "hostname": "cloudvm",
2489
+ "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
2490
+ "summarize_input_len_bytes": 857252,
2491
+ "summarize_output_len_bytes": 1046,
2492
+ "summarize_time": 58.3172922929128,
2493
+ "generate_output_len_bytes": 2171,
2494
+ "generate_time": 25.735217014948528
2495
+ },
2496
+ {
2497
+ "backend": "text-generation-inference",
2498
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
2499
+ "task": "summary_and_generate",
2500
+ "bits": 16,
2501
+ "ngpus": 8,
2502
+ "reps": 3,
2503
+ "date": "08/21/2023 21:01:04",
2504
+ "git_sha": "51318f44",
2505
+ "n_gpus": 8,
2506
+ "transformers": "4.31.0",
2507
+ "bitsandbytes": "0.41.1",
2508
+ "cuda": "11.8",
2509
+ "hostname": "cloudvm",
2510
+ "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
2511
+ "summarize_input_len_bytes": 857252,
2512
+ "summarize_output_len_bytes": 1267,
2513
+ "summarize_time": 42.85940829912821,
2514
+ "generate_output_len_bytes": 2383,
2515
+ "generate_time": 21.380353291829426
2516
+ },
2517
+ {
2518
+ "backend": "text-generation-inference",
2519
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
2520
+ "task": "summary_and_generate",
2521
+ "bits": 16,
2522
+ "ngpus": 8,
2523
+ "reps": 3,
2524
+ "date": "08/21/2023 21:05:24",
2525
+ "git_sha": "51318f44",
2526
+ "n_gpus": 8,
2527
+ "transformers": "4.31.0",
2528
+ "bitsandbytes": "0.41.1",
2529
+ "cuda": "11.8",
2530
+ "hostname": "cloudvm",
2531
+ "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
2532
+ "summarize_input_len_bytes": 857252,
2533
+ "summarize_output_len_bytes": 1046,
2534
+ "summarize_time": 54.235164642333984,
2535
+ "generate_output_len_bytes": 2171,
2536
+ "generate_time": 25.70338026682536
2537
+ },
2538
+ {
2539
+ "backend": "text-generation-inference",
2540
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
2541
+ "task": "summary_and_generate",
2542
+ "bits": 16,
2543
+ "ngpus": 8,
2544
+ "reps": 3,
2545
+ "date": "08/21/2023 21:10:37",
2546
+ "git_sha": "51318f44",
2547
+ "n_gpus": 8,
2548
+ "transformers": "4.31.0",
2549
+ "bitsandbytes": "0.41.1",
2550
+ "cuda": "11.8",
2551
+ "hostname": "cloudvm",
2552
+ "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
2553
+ "summarize_input_len_bytes": 857252,
2554
+ "summarize_output_len_bytes": 927,
2555
+ "summarize_time": 133.53030570348105,
2556
+ "generate_output_len_bytes": 2782,
2557
+ "generate_time": 72.97924383481343
2558
+ },
2559
+ {
2560
+ "backend": "text-generation-inference",
2561
+ "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat",
2562
+ "task": "summary_and_generate",
2563
+ "bits": 16,
2564
+ "ngpus": 4,
2565
+ "reps": 3,
2566
+ "date": "08/21/2023 22:18:17",
2567
+ "git_sha": "51318f44",
2568
+ "n_gpus": 4,
2569
+ "transformers": "4.31.0",
2570
+ "bitsandbytes": "0.41.1",
2571
+ "cuda": "11.8",
2572
+ "hostname": "cloudvm",
2573
+ "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
2574
+ "summarize_input_len_bytes": 857252,
2575
+ "summarize_output_len_bytes": 927,
2576
+ "summarize_time": 131.45291074117026,
2577
+ "generate_output_len_bytes": 2782,
2578
+ "generate_time": 72.30849742889404
2579
+ },
2580
+ {
2581
+ "backend": "text-generation-inference",
2582
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
2583
+ "task": "summary_and_generate",
2584
+ "bits": 16,
2585
+ "ngpus": 1,
2586
+ "reps": 3,
2587
+ "date": "08/21/2023 22:51:09",
2588
+ "git_sha": "383b6bbc",
2589
+ "n_gpus": 1,
2590
+ "transformers": "4.31.0",
2591
+ "bitsandbytes": "0.41.1",
2592
+ "cuda": "11.8",
2593
+ "hostname": "cloudvm",
2594
+ "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
2595
+ "summarize_input_len_bytes": 857252,
2596
+ "summarize_output_len_bytes": 1267,
2597
+ "summarize_time": 39.269713958104454,
2598
+ "generate_output_len_bytes": 2383,
2599
+ "generate_time": 19.65731406211853
2600
+ },
2601
+ {
2602
+ "backend": "text-generation-inference",
2603
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
2604
+ "task": "summary_and_generate",
2605
+ "bits": 16,
2606
+ "ngpus": 1,
2607
+ "reps": 3,
2608
+ "date": "08/21/2023 22:54:54",
2609
+ "git_sha": "383b6bbc",
2610
+ "n_gpus": 1,
2611
+ "transformers": "4.31.0",
2612
+ "bitsandbytes": "0.41.1",
2613
+ "cuda": "11.8",
2614
+ "hostname": "cloudvm",
2615
+ "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)",
2616
+ "summarize_input_len_bytes": 857252,
2617
+ "summarize_output_len_bytes": 1046,
2618
+ "summarize_time": 51.84283971786499,
2619
+ "generate_output_len_bytes": 2171,
2620
+ "generate_time": 28.441521485646565
2621
+ },
2622
+ {
2623
+ "backend": "text-generation-inference",
2624
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
2625
+ "task": "summary_and_generate",
2626
+ "bits": 16,
2627
+ "ngpus": 2,
2628
+ "reps": 3,
2629
+ "date": "08/21/2023 23:13:10",
2630
+ "git_sha": "383b6bbc",
2631
+ "n_gpus": 2,
2632
+ "transformers": "4.31.0",
2633
+ "bitsandbytes": "0.41.1",
2634
+ "cuda": "11.8",
2635
+ "hostname": "cloudvm",
2636
+ "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)",
2637
+ "summarize_input_len_bytes": 857252,
2638
+ "summarize_output_len_bytes": 1046,
2639
+ "summarize_time": 53.383726040522255,
2640
+ "generate_output_len_bytes": 2171,
2641
+ "generate_time": 24.422890504201252
2642
+ },
2643
+ {
2644
+ "backend": "text-generation-inference",
2645
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
2646
+ "task": "summary_and_generate",
2647
+ "bits": 16,
2648
+ "ngpus": 4,
2649
+ "reps": 3,
2650
+ "date": "08/21/2023 23:18:04",
2651
+ "git_sha": "383b6bbc",
2652
+ "n_gpus": 4,
2653
+ "transformers": "4.31.0",
2654
+ "bitsandbytes": "0.41.1",
2655
+ "cuda": "11.8",
2656
+ "hostname": "cloudvm",
2657
+ "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)",
2658
+ "summarize_input_len_bytes": 857252,
2659
+ "summarize_output_len_bytes": 1046,
2660
+ "summarize_time": 52.791220347086586,
2661
+ "generate_output_len_bytes": 2171,
2662
+ "generate_time": 25.378511508305866
2663
+ },
2664
+ {
2665
+ "backend": "text-generation-inference",
2666
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
2667
+ "task": "summary_and_generate",
2668
+ "bits": 16,
2669
+ "ngpus": 8,
2670
+ "reps": 3,
2671
+ "date": "08/21/2023 23:23:11",
2672
+ "git_sha": "383b6bbc",
2673
+ "n_gpus": 8,
2674
+ "transformers": "4.31.0",
2675
+ "bitsandbytes": "0.41.1",
2676
+ "cuda": "11.8",
2677
+ "hostname": "cloudvm",
2678
+ "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)",
2679
+ "summarize_input_len_bytes": 857252,
2680
+ "summarize_output_len_bytes": 1046,
2681
+ "summarize_time": 56.3846542040507,
2682
+ "generate_output_len_bytes": 2171,
2683
+ "generate_time": 26.636192480723064
2684
+ },
2685
+ {
2686
+ "backend": "text-generation-inference",
2687
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
2688
+ "task": "summary_and_generate",
2689
+ "bits": 16,
2690
+ "ngpus": 1,
2691
+ "reps": 3,
2692
+ "date": "08/21/2023 23:52:44",
2693
+ "git_sha": "da69b822",
2694
+ "n_gpus": 1,
2695
+ "transformers": "4.31.0",
2696
+ "bitsandbytes": "0.41.1",
2697
+ "cuda": "11.7",
2698
+ "hostname": "recypabaszmhhmuae",
2699
+ "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)",
2700
+ "summarize_input_len_bytes": 857252,
2701
+ "summarize_output_len_bytes": 1267,
2702
+ "summarize_time": 40.36223220825195,
2703
+ "generate_output_len_bytes": 2383,
2704
+ "generate_time": 19.87660264968872
2705
+ },
2706
+ {
2707
+ "backend": "text-generation-inference",
2708
+ "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat",
2709
+ "task": "summary_and_generate",
2710
+ "bits": 16,
2711
+ "ngpus": 2,
2712
+ "reps": 3,
2713
+ "date": "08/22/2023 00:15:05",
2714
+ "git_sha": "e843e8c3",
2715
+ "n_gpus": 2,
2716
+ "transformers": "4.31.0",
2717
+ "bitsandbytes": "0.41.1",
2718
+ "cuda": "11.7",
2719
+ "hostname": "recypabaszmhhmuae",
2720
+ "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)",
2721
+ "summarize_input_len_bytes": 857252,
2722
+ "summarize_output_len_bytes": 915,
2723
+ "summarize_time": 64.78201874097188,
2724
+ "generate_output_len_bytes": 2479,
2725
+ "generate_time": 29.02147897084554
2726
+ },
2727
+ {
2728
+ "backend": "transformers",
2729
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
2730
+ "task": "summary_and_generate",
2731
+ "bits": 16,
2732
+ "ngpus": 0,
2733
+ "reps": 3,
2734
+ "date": "08/22/2023 19:01:15",
2735
+ "git_sha": "855b7d15",
2736
+ "n_gpus": 0,
2737
+ "transformers": "4.31.0",
2738
+ "bitsandbytes": "0.41.1",
2739
+ "cuda": "11.7",
2740
+ "hostname": "rippa",
2741
+ "gpus": "CPU",
2742
+ "summarize_input_len_bytes": 857252,
2743
+ "summarize_output_len_bytes": 1351,
2744
+ "summarize_time": 1215.5185990333557,
2745
+ "generate_output_len_bytes": 849,
2746
+ "generate_time": 180.56836318969727
2747
+ },
2748
+ {
2749
+ "backend": "transformers",
2750
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
2751
+ "task": "summary_and_generate",
2752
+ "bits": 8,
2753
+ "ngpus": 0,
2754
+ "reps": 3,
2755
+ "date": "08/22/2023 20:11:16",
2756
+ "git_sha": "855b7d15",
2757
+ "n_gpus": 0,
2758
+ "transformers": "4.31.0",
2759
+ "bitsandbytes": "0.41.1",
2760
+ "cuda": "11.7",
2761
+ "hostname": "rippa",
2762
+ "gpus": "CPU",
2763
+ "summarize_input_len_bytes": 857252,
2764
+ "summarize_output_len_bytes": 1353,
2765
+ "summarize_time": 1216.9783231417339,
2766
+ "generate_output_len_bytes": 849,
2767
+ "generate_time": 180.42225472132364
2768
+ },
2769
+ {
2770
+ "backend": "transformers",
2771
+ "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat",
2772
+ "task": "summary_and_generate",
2773
+ "bits": 4,
2774
+ "ngpus": 0,
2775
+ "reps": 3,
2776
+ "date": "08/22/2023 21:21:20",
2777
+ "git_sha": "855b7d15",
2778
+ "n_gpus": 0,
2779
+ "transformers": "4.31.0",
2780
+ "bitsandbytes": "0.41.1",
2781
+ "cuda": "11.7",
2782
+ "hostname": "rippa",
2783
+ "gpus": "CPU",
2784
+ "summarize_input_len_bytes": 857252,
2785
+ "summarize_output_len_bytes": 1354,
2786
+ "summarize_time": 1217.1687794526417,
2787
+ "generate_output_len_bytes": 843,
2788
+ "generate_time": 180.78463260332742
2789
+ }
2790
+ ]
benchmarks/perf.json ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 10:46:19", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1417, "summarize_time": 32.29472152392069, "generate_output_len_bytes": 2384, "generate_time": 14.563165505727133}
2
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 10:48:55", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1417, "summarize_time": 67.97515447934468, "generate_output_len_bytes": 2384, "generate_time": 33.00641902287801}
3
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 10:48:58", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1440, "summarize_time": 114.62220064798991, "generate_output_len_bytes": 2619, "generate_time": 71.0722058614095}
4
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 10:58:34", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 866, "summarize_time": 39.54404203097025, "generate_output_len_bytes": 2927, "generate_time": 22.466302394866943}
5
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/18/2023 11:01:59", "git_sha": "55d3b55b", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1417, "summarize_time": 32.1394579410553, "generate_output_len_bytes": 2384, "generate_time": 14.757195552190145}
6
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 10:54:29", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 910, "summarize_time": 185.14580019315085, "generate_output_len_bytes": 2042, "generate_time": 117.13909141222636}
7
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/18/2023 11:04:37", "git_sha": "55d3b55b", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 94.98129558563232, "generate_output_len_bytes": 2512, "generate_time": 69.4871145884196}
8
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/18/2023 11:13:08", "git_sha": "55d3b55b", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1276, "summarize_time": 43.23498781522115, "generate_output_len_bytes": 2927, "generate_time": 22.826789538065594}
9
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 11:10:08", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 991, "summarize_time": 90.51939169565837, "generate_output_len_bytes": 2927, "generate_time": 48.96095744768778}
10
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 11:16:48", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1417, "summarize_time": 31.86189842224121, "generate_output_len_bytes": 2384, "generate_time": 14.209659894307455}
11
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/18/2023 11:17:39", "git_sha": "55d3b55b", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1417, "summarize_time": 71.48081835110982, "generate_output_len_bytes": 2384, "generate_time": 33.5740262667338}
12
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 11:19:24", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 94.17744310696919, "generate_output_len_bytes": 2512, "generate_time": 70.12592967351277}
13
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 11:27:57", "git_sha": "55d3b55b", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1276, "summarize_time": 42.8066500822703, "generate_output_len_bytes": 2927, "generate_time": 22.626200040181477}
14
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/18/2023 11:23:22", "git_sha": "55d3b55b", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 910, "summarize_time": 186.88371555010477, "generate_output_len_bytes": 2042, "generate_time": 117.3530724843343}
15
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/18/2023 11:39:03", "git_sha": "55d3b55b", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 991, "summarize_time": 94.50985678037007, "generate_output_len_bytes": 2927, "generate_time": 50.06416177749634}
16
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 21:08:31", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 38.80374129613241, "generate_output_len_bytes": 2384, "generate_time": 19.23690136273702}
17
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 21:11:49", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1179, "summarize_time": 178.79640992482504, "generate_output_len_bytes": 2772, "generate_time": 93.99476226170857}
18
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 21:25:53", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 53.44271365801493, "generate_output_len_bytes": 2927, "generate_time": 30.641155401865642}
19
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/18/2023 21:30:30", "git_sha": "fc4826f2", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 40.80062770843506, "generate_output_len_bytes": 2384, "generate_time": 19.825008392333984}
20
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/18/2023 21:35:29", "git_sha": "fc4826f2", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1179, "summarize_time": 177.35046529769897, "generate_output_len_bytes": 2772, "generate_time": 91.73111907641093}
21
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/18/2023 21:49:20", "git_sha": "fc4826f2", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 56.894784371058144, "generate_output_len_bytes": 2927, "generate_time": 32.15500020980835}
22
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/18/2023 21:54:11", "git_sha": "fc4826f2", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 41.46419604619344, "generate_output_len_bytes": 2384, "generate_time": 20.049855709075928}
23
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 4, "reps": 3, "date": "08/18/2023 21:57:39", "git_sha": "fc4826f2", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1179, "summarize_time": 183.73364853858948, "generate_output_len_bytes": 2772, "generate_time": 94.9052836894989}
24
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 4, "reps": 3, "date": "08/18/2023 22:11:59", "git_sha": "fc4826f2", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 59.204413731892906, "generate_output_len_bytes": 2927, "generate_time": 33.25332593917847}
25
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 8, "reps": 3, "date": "08/18/2023 22:17:00", "git_sha": "fc4826f2", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 42.09002653757731, "generate_output_len_bytes": 2384, "generate_time": 20.106103817621868}
26
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 8, "reps": 3, "date": "08/18/2023 22:20:31", "git_sha": "fc4826f2", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1179, "summarize_time": 185.28164370854697, "generate_output_len_bytes": 2772, "generate_time": 95.13023789723714}
27
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 8, "reps": 3, "date": "08/18/2023 22:34:58", "git_sha": "fc4826f2", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 60.9919019540151, "generate_output_len_bytes": 2927, "generate_time": 34.328625202178955}
28
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 13:31:34", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 52.49842747052511, "generate_output_len_bytes": 2172, "generate_time": 20.686774571736652}
29
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 13:31:55", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "exception": "OOM"}
30
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 13:35:38", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1007, "summarize_time": 168.9666860898336, "generate_output_len_bytes": 2249, "generate_time": 73.25518870353699}
31
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 13:48:09", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 856, "summarize_time": 45.30513469378153, "generate_output_len_bytes": 1802, "generate_time": 22.000216643015545}
32
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/18/2023 13:51:56", "git_sha": "fc4826f2", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 51.64275654157003, "generate_output_len_bytes": 2172, "generate_time": 20.737667481104534}
33
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 13:35:47", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 980, "summarize_time": 280.4669913450877, "generate_output_len_bytes": 2132, "generate_time": 141.7793349424998}
34
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 13:57:35", "git_sha": "fc4826f2", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 869, "summarize_time": 96.61887431144714, "generate_output_len_bytes": 3244, "generate_time": 82.98751719792683}
35
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/18/2023 13:55:51", "git_sha": "fc4826f2", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1007, "summarize_time": 167.52292919158936, "generate_output_len_bytes": 2249, "generate_time": 71.82611886660258}
36
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/18/2023 14:08:08", "git_sha": "fc4826f2", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 856, "summarize_time": 47.14254776636759, "generate_output_len_bytes": 1802, "generate_time": 22.54850967725118}
37
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 14:15:15", "git_sha": "d13230ee", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)", "exception": "OOM"}
38
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/18/2023 14:07:15", "git_sha": "fc4826f2", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 915, "summarize_time": 89.59958203633626, "generate_output_len_bytes": 2172, "generate_time": 42.32424934705099}
39
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 14:15:30", "git_sha": "d13230ee", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1024, "summarize_time": 185.44230167071024, "generate_output_len_bytes": 2122, "generate_time": 88.11553311347961}
40
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 14:29:36", "git_sha": "d13230ee", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 922, "summarize_time": 68.06459252039592, "generate_output_len_bytes": 1802, "generate_time": 27.939613421758015}
41
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/18/2023 14:26:29", "git_sha": "d13230ee", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 980, "summarize_time": 280.8310640652974, "generate_output_len_bytes": 2132, "generate_time": 143.21916349728903}
42
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/18/2023 14:48:17", "git_sha": "d13230ee", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 869, "summarize_time": 98.47045453389485, "generate_output_len_bytes": 3244, "generate_time": 83.71360301971436}
43
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 15:35:13", "git_sha": "0dec0f52", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "exception": "OOM"}
44
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 15:49:33", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "exception": "OOM"}
45
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 16:26:53", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "exception": "OOM"}
46
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/18/2023 16:27:32", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "exception": "OOM"}
47
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 16:29:03", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "exception": "OOM"}
48
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/18/2023 17:26:02", "git_sha": "0cdb75ef", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)", "exception": "OOM"}
49
+ {"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 18:59:16", "git_sha": "5691db4a", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1075, "summarize_time": 39.01545596122742, "generate_output_len_bytes": 2242, "generate_time": 10.151424566904703}
50
+ {"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 19:03:13", "git_sha": "5691db4a", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 940, "summarize_time": 21.78233750661214, "generate_output_len_bytes": 2130, "generate_time": 15.794983307520548}
51
+ {"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/18/2023 19:38:40", "git_sha": "6f05e8f1", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1114, "summarize_time": 7.636120955149333, "generate_output_len_bytes": 2275, "generate_time": 7.922623078028361}
52
+ {"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/18/2023 19:41:02", "git_sha": "6f05e8f1", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1024, "summarize_time": 10.824170271555582, "generate_output_len_bytes": 2130, "generate_time": 9.209020694096884}
53
+ {"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 19:55:17", "git_sha": "2c548f21", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA GeForce RTX 4090 (24564 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1088, "summarize_time": 24.39883820215861, "generate_output_len_bytes": 2275, "generate_time": 12.755743900934855}
54
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/19/2023 00:57:21", "git_sha": "a227be4f", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 37.113919814427696, "generate_output_len_bytes": 2384, "generate_time": 18.36507821083069}
55
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/19/2023 01:00:31", "git_sha": "a227be4f", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 49.79721482594808, "generate_output_len_bytes": 2172, "generate_time": 21.780913591384888}
56
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/19/2023 01:04:36", "git_sha": "a227be4f", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "exception": "OOM"}
57
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/19/2023 01:05:26", "git_sha": "a227be4f", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1179, "summarize_time": 181.2461258570353, "generate_output_len_bytes": 2772, "generate_time": 92.64811905225118}
58
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/19/2023 01:19:33", "git_sha": "a227be4f", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 800, "summarize_time": 174.4576851526896, "generate_output_len_bytes": 2713, "generate_time": 119.14412077267964}
59
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/19/2023 01:36:14", "git_sha": "a227be4f", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 53.39731526374817, "generate_output_len_bytes": 2927, "generate_time": 31.369641542434692}
60
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/19/2023 01:40:53", "git_sha": "a227be4f", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1000, "summarize_time": 74.27096923192342, "generate_output_len_bytes": 1802, "generate_time": 29.860486666361492}
61
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/19/2023 01:48:09", "git_sha": "a227be4f", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 39.926851193110146, "generate_output_len_bytes": 2384, "generate_time": 18.481745958328247}
62
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/19/2023 01:51:27", "git_sha": "a227be4f", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 51.299002488454185, "generate_output_len_bytes": 2172, "generate_time": 21.828503131866455}
63
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/19/2023 01:56:20", "git_sha": "a227be4f", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1179, "summarize_time": 178.19972308476767, "generate_output_len_bytes": 2772, "generate_time": 91.73426882425944}
64
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/19/2023 02:10:13", "git_sha": "a227be4f", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 800, "summarize_time": 180.7814578215281, "generate_output_len_bytes": 2713, "generate_time": 124.72717420260112}
65
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/19/2023 02:26:43", "git_sha": "a227be4f", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 57.08081785837809, "generate_output_len_bytes": 2927, "generate_time": 32.26534946759542}
66
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/19/2023 02:31:36", "git_sha": "a227be4f", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1000, "summarize_time": 79.9461121559143, "generate_output_len_bytes": 1802, "generate_time": 31.403561115264893}
67
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/19/2023 02:38:23", "git_sha": "a227be4f", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 42.33977222442627, "generate_output_len_bytes": 2384, "generate_time": 19.723278522491455}
68
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/19/2023 02:41:52", "git_sha": "a227be4f", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 55.377869288126625, "generate_output_len_bytes": 2172, "generate_time": 25.01458676656087}
69
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 4, "reps": 3, "date": "08/19/2023 02:47:05", "git_sha": "a227be4f", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1179, "summarize_time": 180.53432401021323, "generate_output_len_bytes": 2772, "generate_time": 91.93375285466512}
70
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 4, "reps": 3, "date": "08/19/2023 03:01:07", "git_sha": "a227be4f", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 800, "summarize_time": 179.50477250417075, "generate_output_len_bytes": 2713, "generate_time": 124.40728378295898}
71
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 4, "reps": 3, "date": "08/19/2023 03:17:36", "git_sha": "a227be4f", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 58.62867816289266, "generate_output_len_bytes": 2927, "generate_time": 33.394495725631714}
72
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 4, "reps": 3, "date": "08/19/2023 03:22:37", "git_sha": "a227be4f", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1000, "summarize_time": 78.90612125396729, "generate_output_len_bytes": 1802, "generate_time": 30.697617371877033}
73
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 8, "reps": 3, "date": "08/19/2023 03:29:20", "git_sha": "a227be4f", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 40.498607873916626, "generate_output_len_bytes": 2384, "generate_time": 19.509677171707153}
74
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 8, "reps": 3, "date": "08/19/2023 03:32:44", "git_sha": "a227be4f", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 55.3964786529541, "generate_output_len_bytes": 2172, "generate_time": 24.347585439682007}
75
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 8, "reps": 3, "date": "08/19/2023 03:37:55", "git_sha": "a227be4f", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1179, "summarize_time": 186.71331850687662, "generate_output_len_bytes": 2772, "generate_time": 95.784650405248}
76
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 8, "reps": 3, "date": "08/19/2023 03:52:28", "git_sha": "a227be4f", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 800, "summarize_time": 185.3280005455017, "generate_output_len_bytes": 2713, "generate_time": 125.91738017400105}
77
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 8, "reps": 3, "date": "08/19/2023 04:09:18", "git_sha": "a227be4f", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1002, "summarize_time": 60.18280680974325, "generate_output_len_bytes": 2927, "generate_time": 33.386961142222084}
78
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 8, "reps": 3, "date": "08/19/2023 04:14:25", "git_sha": "a227be4f", "n_gpus": 8, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1000, "summarize_time": 83.04790727297465, "generate_output_len_bytes": 1802, "generate_time": 32.24992283185323}
79
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 23:26:19", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1417, "summarize_time": 47.03754989306132, "generate_output_len_bytes": 2384, "generate_time": 19.964784463246662}
80
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 23:33:09", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 915, "summarize_time": 71.91136892636617, "generate_output_len_bytes": 2480, "generate_time": 33.6295014222463}
81
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 23:44:08", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "exception": "OOM"}
82
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/19/2023 00:45:42", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1007, "summarize_time": 148.61560583114624, "generate_output_len_bytes": 2357, "generate_time": 89.01266026496887}
83
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/19/2023 00:58:00", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 763, "summarize_time": 193.99270629882812, "generate_output_len_bytes": 2129, "generate_time": 95.66660761833191}
84
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/19/2023 01:13:01", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "exception": "OOM"}
85
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/19/2023 01:13:55", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 991, "summarize_time": 61.52411222457886, "generate_output_len_bytes": 2927, "generate_time": 32.030215660730995}
86
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/19/2023 01:19:00", "git_sha": "0cdb75ef", "n_gpus": 1, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1000, "summarize_time": 81.13888708750407, "generate_output_len_bytes": 3486, "generate_time": 55.5331826210022}
87
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/19/2023 01:27:49", "git_sha": "0cdb75ef", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1417, "summarize_time": 47.41046245892843, "generate_output_len_bytes": 2384, "generate_time": 20.660600344340008}
88
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/19/2023 01:34:28", "git_sha": "0cdb75ef", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 915, "summarize_time": 72.85646979014079, "generate_output_len_bytes": 2480, "generate_time": 34.05861854553223}
89
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/19/2023 02:39:22", "git_sha": "0cdb75ef", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1007, "summarize_time": 152.54357608159384, "generate_output_len_bytes": 2357, "generate_time": 91.51808977127075}
90
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/19/2023 02:52:58", "git_sha": "0cdb75ef", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 763, "summarize_time": 195.92926557858786, "generate_output_len_bytes": 2129, "generate_time": 96.55542047818501}
91
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/19/2023 03:15:01", "git_sha": "0cdb75ef", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 991, "summarize_time": 64.64422671000163, "generate_output_len_bytes": 2927, "generate_time": 33.30378039677938}
92
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/19/2023 03:20:19", "git_sha": "0cdb75ef", "n_gpus": 2, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1000, "summarize_time": 84.57761120796204, "generate_output_len_bytes": 3486, "generate_time": 57.59072462717692}
93
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/19/2023 03:28:44", "git_sha": "0cdb75ef", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1417, "summarize_time": 49.08898218472799, "generate_output_len_bytes": 2384, "generate_time": 21.489527861277264}
94
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/19/2023 03:32:39", "git_sha": "0cdb75ef", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 915, "summarize_time": 74.43774898846944, "generate_output_len_bytes": 2480, "generate_time": 34.72673638661703}
95
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 4, "reps": 3, "date": "08/19/2023 03:39:21", "git_sha": "0cdb75ef", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1007, "summarize_time": 153.41076453526816, "generate_output_len_bytes": 2357, "generate_time": 91.14894040425618}
96
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 4, "reps": 3, "date": "08/19/2023 03:52:00", "git_sha": "0cdb75ef", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 763, "summarize_time": 199.79869039853415, "generate_output_len_bytes": 2129, "generate_time": 98.61504419644673}
97
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 4, "reps": 3, "date": "08/19/2023 04:08:12", "git_sha": "0cdb75ef", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 991, "summarize_time": 66.49260465304057, "generate_output_len_bytes": 2927, "generate_time": 34.17951035499573}
98
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 4, "reps": 3, "date": "08/19/2023 04:13:39", "git_sha": "0cdb75ef", "n_gpus": 4, "transformers": "4.30.2", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1000, "summarize_time": 87.65787092844646, "generate_output_len_bytes": 3486, "generate_time": 59.3750696182251}
99
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/18/2023 22:22:24", "git_sha": "b63768c6", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "1 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 948, "summarize_time": 122.13213857014973, "generate_output_len_bytes": 2826, "generate_time": 66.34098903338115}
100
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/18/2023 22:33:33", "git_sha": "c1348fb3", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "2 x NVIDIA RTX 6000 Ada Generation (49140 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 948, "summarize_time": 120.53812781969707, "generate_output_len_bytes": 2826, "generate_time": 67.28052496910095}
101
+ {"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 22:56:52", "git_sha": "fb84de76", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1036, "summarize_time": 29.128981749216717, "generate_output_len_bytes": 2242, "generate_time": 12.197122732798258}
102
+ {"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/18/2023 23:00:33", "git_sha": "fb84de76", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "1 x NVIDIA GeForce RTX 3090 (24576 MiB)", "exception": "OOM"}
103
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/19/2023 05:47:43", "git_sha": "22352acd", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "exception": "OOM"}
104
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 1, "reps": 3, "date": "08/19/2023 05:48:58", "git_sha": "22352acd", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "exception": "OOM"}
105
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 1, "reps": 3, "date": "08/19/2023 05:50:40", "git_sha": "22352acd", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 948, "summarize_time": 165.05752809842429, "generate_output_len_bytes": 2605, "generate_time": 93.80659619967143}
106
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/19/2023 06:05:51", "git_sha": "22352acd", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "exception": "OOM"}
107
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 2, "reps": 3, "date": "08/19/2023 06:10:05", "git_sha": "22352acd", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 906, "summarize_time": 410.0691332022349, "generate_output_len_bytes": 521, "generate_time": 57.71272214253744}
108
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 2, "reps": 3, "date": "08/19/2023 06:36:58", "git_sha": "22352acd", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 948, "summarize_time": 171.74388321240744, "generate_output_len_bytes": 2605, "generate_time": 97.00725762049358}
109
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/19/2023 06:51:13", "git_sha": "22352acd", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 792, "summarize_time": 267.0555826822917, "generate_output_len_bytes": 2783, "generate_time": 163.99818523724875}
110
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 4, "reps": 3, "date": "08/19/2023 07:13:35", "git_sha": "22352acd", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 906, "summarize_time": 413.9569679101308, "generate_output_len_bytes": 521, "generate_time": 58.52583885192871}
111
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 4, "reps": 3, "date": "08/19/2023 07:38:02", "git_sha": "22352acd", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 948, "summarize_time": 175.4907926718394, "generate_output_len_bytes": 2605, "generate_time": 98.97720170021057}
112
+ {"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/19/2023 12:35:08", "git_sha": "29a002e5", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "timemachine", "gpus": "2 x NVIDIA GeForce RTX 3090 (24576 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 983, "summarize_time": 42.21107586224874, "generate_output_len_bytes": 2130, "generate_time": 16.94527777036031}
113
+ {"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/21/2023 20:03:36", "git_sha": "51318f44", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 41.0461368560791, "generate_output_len_bytes": 2383, "generate_time": 19.614749511082966}
114
+ {"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/21/2023 20:07:35", "git_sha": "51318f44", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 42.8376894791921, "generate_output_len_bytes": 2383, "generate_time": 20.2719091574351}
115
+ {"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/21/2023 20:42:46", "git_sha": "2f4bb620", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "exception": "OOM"}
116
+ {"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/21/2023 20:50:19", "git_sha": "2f4bb620", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 915, "summarize_time": 66.52468911806743, "generate_output_len_bytes": 2479, "generate_time": 29.828714847564697}
117
+ {"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/21/2023 20:56:04", "git_sha": "2f4bb620", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "4 x NVIDIA RTX A6000 (46068 MiB)", "exception": "OOM"}
118
+ {"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/21/2023 19:55:35", "git_sha": "51318f44", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 38.753786404927574, "generate_output_len_bytes": 2383, "generate_time": 19.529522736867268}
119
+ {"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/21/2023 20:36:13", "git_sha": "51318f44", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 41.024452924728394, "generate_output_len_bytes": 2383, "generate_time": 20.29120985666911}
120
+ {"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/21/2023 20:40:08", "git_sha": "51318f44", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 54.554532527923584, "generate_output_len_bytes": 2171, "generate_time": 24.604793945948284}
121
+ {"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/21/2023 20:50:05", "git_sha": "51318f44", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 41.09950613975525, "generate_output_len_bytes": 2383, "generate_time": 20.947362899780273}
122
+ {"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/21/2023 20:54:08", "git_sha": "51318f44", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 58.3172922929128, "generate_output_len_bytes": 2171, "generate_time": 25.735217014948528}
123
+ {"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 8, "reps": 3, "date": "08/21/2023 21:01:04", "git_sha": "51318f44", "n_gpus": 8, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 42.85940829912821, "generate_output_len_bytes": 2383, "generate_time": 21.380353291829426}
124
+ {"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 8, "reps": 3, "date": "08/21/2023 21:05:24", "git_sha": "51318f44", "n_gpus": 8, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 54.235164642333984, "generate_output_len_bytes": 2171, "generate_time": 25.70338026682536}
125
+ {"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 8, "reps": 3, "date": "08/21/2023 21:10:37", "git_sha": "51318f44", "n_gpus": 8, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 927, "summarize_time": 133.53030570348105, "generate_output_len_bytes": 2782, "generate_time": 72.97924383481343}
126
+ {"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-70b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/21/2023 22:18:17", "git_sha": "51318f44", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 927, "summarize_time": 131.45291074117026, "generate_output_len_bytes": 2782, "generate_time": 72.30849742889404}
127
+ {"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/21/2023 22:51:09", "git_sha": "383b6bbc", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 39.269713958104454, "generate_output_len_bytes": 2383, "generate_time": 19.65731406211853}
128
+ {"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/21/2023 22:54:54", "git_sha": "383b6bbc", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "1 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 51.84283971786499, "generate_output_len_bytes": 2171, "generate_time": 28.441521485646565}
129
+ {"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/21/2023 23:13:10", "git_sha": "383b6bbc", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "2 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 53.383726040522255, "generate_output_len_bytes": 2171, "generate_time": 24.422890504201252}
130
+ {"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 4, "reps": 3, "date": "08/21/2023 23:18:04", "git_sha": "383b6bbc", "n_gpus": 4, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "4 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 52.791220347086586, "generate_output_len_bytes": 2171, "generate_time": 25.378511508305866}
131
+ {"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 8, "reps": 3, "date": "08/21/2023 23:23:11", "git_sha": "383b6bbc", "n_gpus": 8, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.8", "hostname": "cloudvm", "gpus": "8 x NVIDIA A100-SXM4-80GB (81920 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1046, "summarize_time": 56.3846542040507, "generate_output_len_bytes": 2171, "generate_time": 26.636192480723064}
132
+ {"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 1, "reps": 3, "date": "08/21/2023 23:52:44", "git_sha": "da69b822", "n_gpus": 1, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "1 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1267, "summarize_time": 40.36223220825195, "generate_output_len_bytes": 2383, "generate_time": 19.87660264968872}
133
+ {"backend": "text-generation-inference", "base_model": "h2oai/h2ogpt-4096-llama2-13b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 2, "reps": 3, "date": "08/22/2023 00:15:05", "git_sha": "e843e8c3", "n_gpus": 2, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "recypabaszmhhmuae", "gpus": "2 x NVIDIA RTX A6000 (46068 MiB)", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 915, "summarize_time": 64.78201874097188, "generate_output_len_bytes": 2479, "generate_time": 29.02147897084554}
134
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 16, "ngpus": 0, "reps": 3, "date": "08/22/2023 19:01:15", "git_sha": "855b7d15", "n_gpus": 0, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "CPU", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1351, "summarize_time": 1215.5185990333557, "generate_output_len_bytes": 849, "generate_time": 180.56836318969727}
135
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 8, "ngpus": 0, "reps": 3, "date": "08/22/2023 20:11:16", "git_sha": "855b7d15", "n_gpus": 0, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "CPU", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1353, "summarize_time": 1216.9783231417339, "generate_output_len_bytes": 849, "generate_time": 180.42225472132364}
136
+ {"backend": "transformers", "base_model": "h2oai/h2ogpt-4096-llama2-7b-chat", "task": "summary_and_generate", "bits": 4, "ngpus": 0, "reps": 3, "date": "08/22/2023 21:21:20", "git_sha": "855b7d15", "n_gpus": 0, "transformers": "4.31.0", "bitsandbytes": "0.41.1", "cuda": "11.7", "hostname": "rippa", "gpus": "CPU", "summarize_input_len_bytes": 857252, "summarize_output_len_bytes": 1354, "summarize_time": 1217.1687794526417, "generate_output_len_bytes": 843, "generate_time": 180.78463260332742}
benchmarks/perf.md ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Backend: transformers
2
+
3
+ For [Interactive visualization of the results](https://raw.githubusercontent.com/h2oai/h2ogpt/blob/main/benchmarks/llm_gpu_benchmark_transformers.html), save the linked file as html on your machine and open it in a browser.
4
+
5
+
6
+ ## Model: h2oai/h2ogpt-4096-llama2-7b-chat (transformers)
7
+ ### Number of GPUs: 0
8
+ | bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
9
+ |-------:|:-------|---------------------------:|--------------------------------:|:------------|
10
+ | 16 | CPU | 1215.52 | 1.17546 | |
11
+ | 8 | CPU | 1216.98 | 1.17641 | |
12
+ | 4 | CPU | 1217.17 | 1.16575 | |
13
+ ### Number of GPUs: 1
14
+ | bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
15
+ |-------:|:-----------------------------------------------|---------------------------:|--------------------------------:|:------------|
16
+ | 16 | 1 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | 31.8619 | 41.9433 | |
17
+ | 16 | 1 x NVIDIA GeForce RTX 4090 (24564 MiB) | 32.2947 | 40.9252 | |
18
+ | 16 | 1 x NVIDIA A100-SXM4-80GB (81920 MiB) | 37.1139 | 32.4529 | |
19
+ | 16 | 1 x NVIDIA RTX A6000 (46068 MiB) | 47.0375 | 29.8526 | |
20
+ | 16 | 1 x NVIDIA GeForce RTX 3090 (24576 MiB) | 67.9752 | 18.0571 | |
21
+ | 8 | 1 x NVIDIA GeForce RTX 4090 (24564 MiB) | 114.622 | 9.21246 | |
22
+ | 8 | 1 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | 94.1774 | 8.95532 | |
23
+ | 8 | 1 x NVIDIA A100-SXM4-80GB (81920 MiB) | 181.246 | 7.47991 | |
24
+ | 8 | 1 x NVIDIA RTX A6000 (46068 MiB) | 148.616 | 6.61984 | |
25
+ | 8 | 1 x NVIDIA GeForce RTX 3090 (24576 MiB) | 185.146 | 4.35807 | |
26
+ | 4 | 1 x NVIDIA GeForce RTX 4090 (24564 MiB) | 39.544 | 32.571 | |
27
+ | 4 | 1 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | 42.8067 | 32.3408 | |
28
+ | 4 | 1 x NVIDIA A100-SXM4-80GB (81920 MiB) | 53.3973 | 23.3267 | |
29
+ | 4 | 1 x NVIDIA RTX A6000 (46068 MiB) | 61.5241 | 22.8456 | |
30
+ | 4 | 1 x NVIDIA GeForce RTX 3090 (24576 MiB) | 90.5194 | 14.9456 | |
31
+ ### Number of GPUs: 2
32
+ | bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
33
+ |-------:|:-----------------------------------------------|---------------------------:|--------------------------------:|:------------|
34
+ | 16 | 2 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | 32.1395 | 40.3871 | |
35
+ | 16 | 2 x NVIDIA A100-SXM4-80GB (81920 MiB) | 39.9269 | 32.248 | |
36
+ | 16 | 2 x NVIDIA RTX A6000 (46068 MiB) | 47.4105 | 28.8472 | |
37
+ | 16 | 2 x NVIDIA GeForce RTX 3090 (24576 MiB) | 71.4808 | 17.7518 | |
38
+ | 8 | 2 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | 94.9813 | 9.03765 | |
39
+ | 8 | 2 x NVIDIA A100-SXM4-80GB (81920 MiB) | 178.2 | 7.55443 | |
40
+ | 8 | 2 x NVIDIA RTX A6000 (46068 MiB) | 152.544 | 6.43862 | |
41
+ | 8 | 2 x NVIDIA GeForce RTX 3090 (24576 MiB) | 186.884 | 4.35012 | |
42
+ | 4 | 2 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | 43.235 | 32.0566 | |
43
+ | 4 | 2 x NVIDIA A100-SXM4-80GB (81920 MiB) | 57.0808 | 22.6791 | |
44
+ | 4 | 2 x NVIDIA RTX A6000 (46068 MiB) | 64.6442 | 21.972 | |
45
+ | 4 | 2 x NVIDIA GeForce RTX 3090 (24576 MiB) | 94.5099 | 14.6162 | |
46
+ ### Number of GPUs: 4
47
+ | bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
48
+ |-------:|:--------------------------------------|---------------------------:|--------------------------------:|:------------|
49
+ | 16 | 4 x NVIDIA A100-SXM4-80GB (81920 MiB) | 42.3398 | 30.2181 | |
50
+ | 16 | 4 x NVIDIA RTX A6000 (46068 MiB) | 49.089 | 27.7344 | |
51
+ | 8 | 4 x NVIDIA A100-SXM4-80GB (81920 MiB) | 180.534 | 7.53804 | |
52
+ | 8 | 4 x NVIDIA RTX A6000 (46068 MiB) | 153.411 | 6.46469 | |
53
+ | 4 | 4 x NVIDIA A100-SXM4-80GB (81920 MiB) | 58.6287 | 21.9123 | |
54
+ | 4 | 4 x NVIDIA RTX A6000 (46068 MiB) | 66.4926 | 21.409 | |
55
+ ### Number of GPUs: 8
56
+ | bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
57
+ |-------:|:--------------------------------------|---------------------------:|--------------------------------:|:------------|
58
+ | 16 | 8 x NVIDIA A100-SXM4-80GB (81920 MiB) | 40.4986 | 30.5489 | |
59
+ | 8 | 8 x NVIDIA A100-SXM4-80GB (81920 MiB) | 186.713 | 7.23498 | |
60
+ | 4 | 8 x NVIDIA A100-SXM4-80GB (81920 MiB) | 60.1828 | 21.9172 | |
61
+ ## Model: h2oai/h2ogpt-4096-llama2-13b-chat (transformers)
62
+ ### Number of GPUs: 1
63
+ | bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
64
+ |-------:|:-----------------------------------------------|---------------------------:|--------------------------------:|:------------|
65
+ | 16 | 1 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | 52.4984 | 26.2487 | |
66
+ | 16 | 1 x NVIDIA A100-SXM4-80GB (81920 MiB) | 49.7972 | 24.9301 | |
67
+ | 16 | 1 x NVIDIA RTX A6000 (46068 MiB) | 71.9114 | 18.4362 | |
68
+ | 16 | 1 x NVIDIA GeForce RTX 3090 (24576 MiB) | nan | nan | OOM |
69
+ | 16 | 1 x NVIDIA GeForce RTX 4090 (24564 MiB) | nan | nan | OOM |
70
+ | 8 | 1 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | 168.967 | 7.67522 | |
71
+ | 8 | 1 x NVIDIA GeForce RTX 4090 (24564 MiB) | 185.442 | 6.0205 | |
72
+ | 8 | 1 x NVIDIA A100-SXM4-80GB (81920 MiB) | 174.458 | 5.69269 | |
73
+ | 8 | 1 x NVIDIA RTX A6000 (46068 MiB) | 193.993 | 5.56359 | |
74
+ | 8 | 1 x NVIDIA GeForce RTX 3090 (24576 MiB) | 280.467 | 3.75936 | |
75
+ | 4 | 1 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | 45.3051 | 20.4771 | |
76
+ | 4 | 1 x NVIDIA GeForce RTX 4090 (24564 MiB) | 68.0646 | 16.1241 | |
77
+ | 4 | 1 x NVIDIA RTX A6000 (46068 MiB) | 81.1389 | 15.6933 | |
78
+ | 4 | 1 x NVIDIA A100-SXM4-80GB (81920 MiB) | 74.271 | 15.0868 | |
79
+ | 4 | 1 x NVIDIA GeForce RTX 3090 (24576 MiB) | 96.6189 | 9.77255 | |
80
+ ### Number of GPUs: 2
81
+ | bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
82
+ |-------:|:-----------------------------------------------|---------------------------:|--------------------------------:|:------------|
83
+ | 16 | 2 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | 51.6428 | 26.1842 | |
84
+ | 16 | 2 x NVIDIA A100-SXM4-80GB (81920 MiB) | 51.299 | 24.8757 | |
85
+ | 16 | 2 x NVIDIA RTX A6000 (46068 MiB) | 72.8565 | 18.2039 | |
86
+ | 16 | 2 x NVIDIA GeForce RTX 3090 (24576 MiB) | 89.5996 | 12.8295 | |
87
+ | 8 | 2 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | 167.523 | 7.82793 | |
88
+ | 8 | 2 x NVIDIA RTX A6000 (46068 MiB) | 195.929 | 5.51238 | |
89
+ | 8 | 2 x NVIDIA A100-SXM4-80GB (81920 MiB) | 180.781 | 5.43787 | |
90
+ | 8 | 2 x NVIDIA GeForce RTX 3090 (24576 MiB) | 280.831 | 3.72157 | |
91
+ | 4 | 2 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | 47.1425 | 19.9791 | |
92
+ | 4 | 2 x NVIDIA RTX A6000 (46068 MiB) | 84.5776 | 15.1326 | |
93
+ | 4 | 2 x NVIDIA A100-SXM4-80GB (81920 MiB) | 79.9461 | 14.3455 | |
94
+ | 4 | 2 x NVIDIA GeForce RTX 3090 (24576 MiB) | 98.4705 | 9.68779 | |
95
+ ### Number of GPUs: 4
96
+ | bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
97
+ |-------:|:--------------------------------------|---------------------------:|--------------------------------:|:------------|
98
+ | 16 | 4 x NVIDIA A100-SXM4-80GB (81920 MiB) | 55.3779 | 21.7073 | |
99
+ | 16 | 4 x NVIDIA RTX A6000 (46068 MiB) | 74.4377 | 17.8537 | |
100
+ | 8 | 4 x NVIDIA A100-SXM4-80GB (81920 MiB) | 179.505 | 5.45185 | |
101
+ | 8 | 4 x NVIDIA RTX A6000 (46068 MiB) | 199.799 | 5.39725 | |
102
+ | 4 | 4 x NVIDIA RTX A6000 (46068 MiB) | 87.6579 | 14.6779 | |
103
+ | 4 | 4 x NVIDIA A100-SXM4-80GB (81920 MiB) | 78.9061 | 14.6754 | |
104
+ ### Number of GPUs: 8
105
+ | bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
106
+ |-------:|:--------------------------------------|---------------------------:|--------------------------------:|:------------|
107
+ | 16 | 8 x NVIDIA A100-SXM4-80GB (81920 MiB) | 55.3965 | 22.302 | |
108
+ | 8 | 8 x NVIDIA A100-SXM4-80GB (81920 MiB) | 185.328 | 5.38647 | |
109
+ | 4 | 8 x NVIDIA A100-SXM4-80GB (81920 MiB) | 83.0479 | 13.969 | |
110
+ ## Model: h2oai/h2ogpt-4096-llama2-70b-chat (transformers)
111
+ ### Number of GPUs: 1
112
+ | bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
113
+ |-------:|:-----------------------------------------------|---------------------------:|--------------------------------:|:------------|
114
+ | 16 | 1 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | nan | nan | OOM |
115
+ | 16 | 1 x NVIDIA GeForce RTX 3090 (24576 MiB) | nan | nan | OOM |
116
+ | 16 | 1 x NVIDIA A100-SXM4-80GB (81920 MiB) | nan | nan | OOM |
117
+ | 16 | 1 x NVIDIA RTX A6000 (46068 MiB) | nan | nan | OOM |
118
+ | 8 | 1 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | nan | nan | OOM |
119
+ | 8 | 1 x NVIDIA GeForce RTX 3090 (24576 MiB) | nan | nan | OOM |
120
+ | 8 | 1 x NVIDIA RTX A6000 (46068 MiB) | nan | nan | OOM |
121
+ | 4 | 1 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | 122.132 | 10.6495 | |
122
+ | 4 | 1 x NVIDIA RTX A6000 (46068 MiB) | 165.058 | 6.94248 | |
123
+ | 4 | 1 x NVIDIA GeForce RTX 3090 (24576 MiB) | nan | nan | OOM |
124
+ ### Number of GPUs: 2
125
+ | bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
126
+ |-------:|:-----------------------------------------------|---------------------------:|--------------------------------:|:------------|
127
+ | 16 | 2 x NVIDIA RTX A6000 (46068 MiB) | nan | nan | OOM |
128
+ | 8 | 2 x NVIDIA RTX A6000 (46068 MiB) | 410.069 | 2.25687 | |
129
+ | 4 | 2 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | 120.538 | 10.5008 | |
130
+ | 4 | 2 x NVIDIA RTX A6000 (46068 MiB) | 171.744 | 6.71342 | |
131
+ | 4 | 2 x NVIDIA GeForce RTX 3090 (24576 MiB) | nan | nan | OOM |
132
+ ### Number of GPUs: 4
133
+ | bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
134
+ |-------:|:---------------------------------|---------------------------:|--------------------------------:|:------------|
135
+ | 16 | 4 x NVIDIA RTX A6000 (46068 MiB) | 267.056 | 4.24242 | |
136
+ | 8 | 4 x NVIDIA RTX A6000 (46068 MiB) | 413.957 | 2.22551 | |
137
+ | 4 | 4 x NVIDIA RTX A6000 (46068 MiB) | 175.491 | 6.5798 | |
138
+ # Backend: text-generation-inference
139
+
140
+ For [Interactive visualization of the results](https://raw.githubusercontent.com/h2oai/h2ogpt/blob/main/benchmarks/llm_gpu_benchmark_text-generation-inference.html), save the linked file as html on your machine and open it in a browser.
141
+
142
+
143
+ ## Model: h2oai/h2ogpt-4096-llama2-7b-chat (text-generation-inference)
144
+ ### Number of GPUs: 1
145
+ | bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
146
+ |-------:|:-----------------------------------------------|---------------------------:|--------------------------------:|:------------|
147
+ | 16 | 1 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | 39.0155 | 55.2139 | |
148
+ | 16 | 1 x NVIDIA GeForce RTX 3090 (24576 MiB) | 29.129 | 45.9535 | |
149
+ | 16 | 1 x NVIDIA GeForce RTX 4090 (24564 MiB) | 24.3988 | 44.5878 | |
150
+ | 16 | 1 x NVIDIA A100-SXM4-80GB (81920 MiB) | 39.2697 | 30.3068 | |
151
+ | 16 | 1 x NVIDIA RTX A6000 (46068 MiB) | 40.3622 | 29.9724 | |
152
+ ### Number of GPUs: 2
153
+ | bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
154
+ |-------:|:-----------------------------------------------|---------------------------:|--------------------------------:|:------------|
155
+ | 16 | 2 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | 7.63612 | 71.7881 | |
156
+ | 16 | 2 x NVIDIA RTX A6000 (46068 MiB) | 41.0461 | 30.3726 | |
157
+ | 16 | 2 x NVIDIA A100-SXM4-80GB (81920 MiB) | 41.0245 | 29.36 | |
158
+ ### Number of GPUs: 4
159
+ | bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
160
+ |-------:|:--------------------------------------|---------------------------:|--------------------------------:|:------------|
161
+ | 16 | 4 x NVIDIA RTX A6000 (46068 MiB) | 42.8377 | 29.388 | |
162
+ | 16 | 4 x NVIDIA A100-SXM4-80GB (81920 MiB) | 41.0995 | 28.4403 | |
163
+ ### Number of GPUs: 8
164
+ | bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
165
+ |-------:|:--------------------------------------|---------------------------:|--------------------------------:|:------------|
166
+ | 16 | 8 x NVIDIA A100-SXM4-80GB (81920 MiB) | 42.8594 | 27.8644 | |
167
+ ## Model: h2oai/h2ogpt-4096-llama2-13b-chat (text-generation-inference)
168
+ ### Number of GPUs: 1
169
+ | bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
170
+ |-------:|:-----------------------------------------------|---------------------------:|--------------------------------:|:------------|
171
+ | 16 | 1 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | 21.7823 | 33.7132 | |
172
+ | 16 | 1 x NVIDIA A100-SXM4-80GB (81920 MiB) | 51.8428 | 19.083 | |
173
+ | 16 | 1 x NVIDIA GeForce RTX 3090 (24576 MiB) | nan | nan | OOM |
174
+ | 16 | 1 x NVIDIA RTX A6000 (46068 MiB) | nan | nan | OOM |
175
+ ### Number of GPUs: 2
176
+ | bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
177
+ |-------:|:-----------------------------------------------|---------------------------:|--------------------------------:|:------------|
178
+ | 16 | 2 x NVIDIA RTX 6000 Ada Generation (49140 MiB) | 10.8242 | 57.8237 | |
179
+ | 16 | 2 x NVIDIA GeForce RTX 3090 (24576 MiB) | 42.2111 | 31.4247 | |
180
+ | 16 | 2 x NVIDIA A100-SXM4-80GB (81920 MiB) | 53.3837 | 22.223 | |
181
+ | 16 | 2 x NVIDIA RTX A6000 (46068 MiB) | 64.782 | 21.3549 | |
182
+ ### Number of GPUs: 4
183
+ | bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
184
+ |-------:|:--------------------------------------|---------------------------:|--------------------------------:|:------------|
185
+ | 16 | 4 x NVIDIA A100-SXM4-80GB (81920 MiB) | 52.7912 | 21.3862 | |
186
+ | 16 | 4 x NVIDIA RTX A6000 (46068 MiB) | 66.5247 | 20.777 | |
187
+ ### Number of GPUs: 8
188
+ | bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
189
+ |-------:|:--------------------------------------|---------------------------:|--------------------------------:|:------------|
190
+ | 16 | 8 x NVIDIA A100-SXM4-80GB (81920 MiB) | 56.3847 | 20.3764 | |
191
+ ## Model: h2oai/h2ogpt-4096-llama2-70b-chat (text-generation-inference)
192
+ ### Number of GPUs: 4
193
+ | bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
194
+ |-------:|:--------------------------------------|---------------------------:|--------------------------------:|:------------|
195
+ | 16 | 4 x NVIDIA A100-SXM4-80GB (81920 MiB) | 131.453 | 9.61851 | |
196
+ | 16 | 4 x NVIDIA RTX A6000 (46068 MiB) | nan | nan | OOM |
197
+ ### Number of GPUs: 8
198
+ | bits | gpus | summarization time [sec] | generation speed [tokens/sec] | exception |
199
+ |-------:|:--------------------------------------|---------------------------:|--------------------------------:|:------------|
200
+ | 16 | 8 x NVIDIA A100-SXM4-80GB (81920 MiB) | 133.53 | 9.53011 | |
blog/README.md ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Building the World's Best Open-Source Large Language Model: H2O.ai's Journey
2
+
3
+ by Arno Candel, PhD, CTO H2O.ai, April 19 2023
4
+
5
+ At H2O.ai, we pride ourselves on developing world-class Machine Learning, Deep Learning, and AI platforms. We released H2O, the most widely used open-source distributed and scalable machine learning platform, before XGBoost, TensorFlow and PyTorch existed. H2O.ai is home to over 25 Kaggle grandmasters, including the current #1. In 2017, we used GPUs to create the world's best AutoML in H2O Driverless AI. We have witnessed first-hand how Large Language Models (LLMs) have taken over the world by storm.
6
+
7
+ We are proud to announce that we are building h2oGPT, an LLM that not only excels in performance but is also fully open-source and commercially usable, providing a valuable resource for developers, researchers, and organizations worldwide.
8
+
9
+ In this blog, we'll explore our journey in building h2oGPT in our effort to further democratize AI.
10
+
11
+ ## Why Open-Source LLMs?
12
+
13
+ While LLMs like OpenAI's ChatGPT/GPT-4, Anthropic's Claude, Microsoft's Bing AI Chat, Google's Bard, and Cohere are powerful and effective, they have certain limitations compared to open-source LLMs:
14
+
15
+ 1. **Data Privacy and Security**: Using hosted LLMs requires sending data to external servers. This can raise concerns about data privacy, security, and compliance, especially for sensitive information or industries with strict regulations.
16
+ 2. **Dependency and Customization**: Hosted LLMs often limit the extent of customization and control, as users rely on the service provider's infrastructure and predefined models. Open-source LLMs allow users to tailor the models to their specific needs, deploy on their own infrastructure, and even modify the underlying code.
17
+ 3. **Cost and Scalability**: Hosted LLMs usually come with usage fees, which can increase significantly with large-scale applications. Open-source LLMs can be more cost-effective, as users can scale the models on their own infrastructure without incurring additional costs from the service provider.
18
+ 4. **Access and Availability**: Hosted LLMs may be subject to downtime or limited availability, affecting users' access to the models. Open-source LLMs can be deployed on-premises or on private clouds, ensuring uninterrupted access and reducing reliance on external providers.
19
+
20
+ Overall, open-source LLMs offer greater flexibility, control, and cost-effectiveness, while addressing data privacy and security concerns. They foster a competitive landscape in the AI industry and empower users to innovate and customize models to suit their specific needs.
21
+
22
+ ## The H2O.ai LLM Ecosystem
23
+
24
+ Our open-source LLM ecosystem currently includes the following components:
25
+
26
+ 1. **Code, data, and models**: Fully permissive, commercially usable [code](https://github.com/h2oai/h2ogpt), curated fine-tuning [data](https://huggingface.co/h2oai), and fine-tuned [models](https://huggingface.co/h2oai) ranging from 7 to 20 billion parameters.
27
+ 2. **State-of-the-art fine-tuning**: We provide code for highly efficient fine-tuning, including targeted data preparation, prompt engineering, and computational optimizations to fine-tune LLMs with up to 20 billion parameters (even larger models expected soon) in hours on commodity hardware or enterprise servers. Techniques like low-rank approximations (LoRA) and data compression allow computational savings of several orders of magnitude.
28
+ 3. **Chatbot**: We provide code to run a multi-tenant chatbot on GPU servers, with an easily shareable end-point and a Python client API, allowing you to evaluate and compare the performance of fine-tuned LLMs.
29
+ 4. **H2O LLM Studio**: Our no-code LLM fine-tuning framework created by the world's top Kaggle grandmasters makes it even easier to fine-tune and evaluate LLMs.
30
+
31
+ Everything we release is based on fully permissive data and models, with all code open-sourced, enabling broader access for businesses and commercial products without legal concerns, thus expanding access to cutting-edge AI while adhering to licensing requirements.
32
+
33
+ ## Roadmap and Future Plans
34
+
35
+ We have an ambitious roadmap for our LLM ecosystem, including:
36
+
37
+ 1. Integration with downstream applications and low/no-code platforms (H2O Document AI, H2O LLM Studio, etc.)
38
+ 2. Improved validation and benchmarking frameworks of LLMs
39
+ 3. Complementing our chatbot with search and other APIs (LangChain, etc.)
40
+ 4. Contribute to large-scale data cleaning efforts (Open Assistant, Stability AI, RedPajama, etc.)
41
+ 5. High-performance distributed training of larger models on trillion tokens
42
+ 6. High-performance scalable on-premises hosting for high-throughput endpoints
43
+ 7. Improvements in code completion, reasoning, mathematics, factual correctness, hallucinations, and reducing repetitions
44
+
45
+ ## Getting Started with H2O.ai's LLMs
46
+
47
+ You can [Chat with h2oGPT](https://gpt.h2o.ai/) right now!
48
+
49
+ https://user-images.githubusercontent.com/6147661/232924684-6c0e2dfb-2f24-4098-848a-c3e4396f29f6.mov
50
+
51
+ ![](https://user-images.githubusercontent.com/6147661/233239878-de3b0fce-5425-4189-8095-5313c7817d58.png)
52
+ ![](https://user-images.githubusercontent.com/6147661/233239861-e99f238c-dd5d-4dd7-ac17-6367f91f86ac.png)
53
+
54
+ To start using our LLM as a developer, follow the steps below:
55
+
56
+ 1. Clone the repository: `git clone https://github.com/h2oai/h2ogpt.git`
57
+ 2. Change to the repository directory: `cd h2ogpt`
58
+ 3. Install the requirements: `pip install -r requirements.txt`
59
+ 4. Run the chatbot: `python generate.py --base_model=h2oai/h2ogpt-oig-oasst1-256-6_9b`
60
+ 5. Open your browser at `http://0.0.0.0:7860` or the public live URL printed by the server.
61
+
62
+ For more information, visit [h2oGPT GitHub page](https://github.com/h2oai/h2ogpt), [H2O.ai's Hugging Face page](https://huggingface.co/h2oai) and [H2O LLM Studio GitHub page](https://github.com/h2oai/h2o-llmstudio).
63
+
64
+ Join us on this exciting journey as we continue to improve and expand the capabilities of our open-source LLM ecosystem!
65
+
66
+ ## Acknowledgements
67
+
68
+ We appreciate the work by many open-source contributors, especially:
69
+
70
+ * [H2O.ai makers](https://h2o.ai/company/team/)
71
+ * [Alpaca-LoRA](https://github.com/tloen/alpaca-lora/)
72
+ * [LoRA](https://github.com/microsoft/LoRA/)
73
+ * [Stanford Alpaca](https://github.com/tatsu-lab/stanford_alpaca/)
74
+ * [Hugging Face](https://huggingface.co/)
75
+ * [OpenAssistant](https://open-assistant.io/)
76
+ * [EleutherAI](https://www.eleuther.ai/)
77
+ * [LAION](https://laion.ai/blog/oig-dataset/)
78
+ * [BigScience](https://github.com/bigscience-workshop/bigscience/)
79
+ * [LLaMa](https://github.com/facebookresearch/llama/)
80
+ * [StableLM](https://github.com/Stability-AI/StableLM/)
81
+ * [Vicuna](https://github.com/lm-sys/FastChat/)
ci/jenkinsfile ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/groovy
2
+
3
+ @Library('test-shared-library@dai_pipeline') _
4
+
5
+ import ai.h2o.ci.buildsummary.StagesSummary
6
+ import groovy.json.JsonOutput
7
+
8
+ buildSummary('https://github.com/h2oai/h2ogpt', true)
9
+ buildSummary.get().addStagesSummary(this, new StagesSummary())
10
+
11
+ def ALL_TESTS = [
12
+ "test_osx": [
13
+ install_deps: "TRAINING",
14
+ test_target: "test_imports",
15
+ node: "osx",
16
+ test_markers: "not need_tokens and not need_gpu",
17
+ timeout: 90,
18
+ use_docker: false,
19
+ env: ['PYTHON_BINARY=/Users/jenkins/anaconda/envs/h2ogpt-py3.10/bin/python']
20
+ ],
21
+ "test_all": [
22
+ install_deps: "TRAINING,WIKI_EXTRA",
23
+ test_target: "test",
24
+ test_markers: "not need_tokens and not need_gpu",
25
+ node: "DAIDEV-GPU || DAIDEV-2GPU",
26
+ timeout: 90,
27
+ use_docker: true,
28
+ env: []
29
+ ],
30
+ ]
31
+
32
+ pipeline {
33
+ agent none
34
+ parameters {
35
+ booleanParam(name: 'skipTesting', defaultValue: false, description: 'Skip testing')
36
+ text(name: "testTargets", defaultValue: "${ALL_TESTS.keySet().join('\n')}", description: "A select set of tests to run")
37
+ booleanParam(name: 'publish', defaultValue: false, description: 'Upload to HF')
38
+ }
39
+ options {
40
+ ansiColor('xterm')
41
+ timestamps()
42
+ }
43
+ stages {
44
+ stage('Build') {
45
+ agent {
46
+ label "linux && docker"
47
+ }
48
+ steps {
49
+ script {
50
+ def shortHash = sh(returnStdout: true, script: 'git rev-parse --short HEAD').trim()
51
+ def commitMsg = sh(returnStdout: true, script: 'git log -1 --pretty=format:"[%an] %s"').trim()
52
+ currentBuild.displayName = "${env.BUILD_ID} - [${shortHash}]"
53
+ currentBuild.description = "${commitMsg}"
54
+
55
+ sh "make docker_build"
56
+ docker.image("harbor.h2o.ai/library/python:3.10").inside("--entrypoint='' --security-opt seccomp=unconfined -e USE_WHEEL=1 -e HOME=${WORKSPACE}") {
57
+ sh "make clean dist"
58
+ }
59
+
60
+ archiveArtifacts allowEmptyArchive: true, artifacts: "dist/h2ogpt-*.whl"
61
+ stash includes: "dist/h2ogpt-*.whl", name: "wheel_file"
62
+ }
63
+ }
64
+ }
65
+
66
+ stage('Tests') {
67
+ when {
68
+ anyOf {
69
+ expression { return !params.skipTesting }
70
+ }
71
+ beforeAgent true
72
+ }
73
+ agent {
74
+ label "linux && docker"
75
+ }
76
+ steps {
77
+ script {
78
+ def testTargets = [:]
79
+ params.testTargets.split('\n').findAll{ it.contains("test_") }.each { testName ->
80
+ testTargets[testName] = {
81
+ node("${ALL_TESTS[testName].node}") {
82
+ buildSummary.stageWithSummary("${testName}", "${testName}") {
83
+ buildSummary.setStageUrl("${testName}")
84
+ timeout(time: ALL_TESTS[testName].timeout, unit: 'MINUTES') {
85
+ script {
86
+ try {
87
+ dir("${testName}") {
88
+ withEnv(ALL_TESTS[testName].env + ["PYTEST_TEST_NAME=_${testName}", "IS_PR_BUILD=${isPrBranch()}", "USE_WHEEL=1"]) {
89
+
90
+ // cleanup and force the use of the installed wheel
91
+ deleteDir()
92
+ checkout scm
93
+ unstash "wheel_file"
94
+ sh "rm -rf *.py spaces models"
95
+
96
+ // pull runtime details
97
+ def dockerImage = sh(returnStdout: true, script: "make print-DOCKER_TEST_IMAGE").trim()
98
+ def nvidiaSmiExitCode = sh(returnStdout: false, returnStatus: true, script: "nvidia-smi")
99
+ // def dockerRuntime = "${nvidiaSmiExitCode}" == "0" ? "--runtime nvidia" : ""
100
+ def dockerRuntime = "" // TODO: keep until lab machines are upgraded
101
+
102
+ if (ALL_TESTS[testName].use_docker) {
103
+ docker.image("${dockerImage}").inside("--entrypoint='' --security-opt seccomp=unconfined --ulimit core=-1 --init --pid=host -e USE_WHEEL=1 -e HOME=${WORKSPACE}/${testName} ${dockerRuntime}") {
104
+ sh "nvidia-smi || true"
105
+ sh "SKIP_MANUAL_TESTS=1 PYTHON_BINARY=/usr/bin/python3.10 make install"
106
+ sh "SKIP_MANUAL_TESTS=1 PYTHON_BINARY=/usr/bin/python3.10 make install-${ALL_TESTS[testName].install_deps}"
107
+ sh """DEFAULT_MARKERS="${ALL_TESTS[testName].test_markers}" SKIP_MANUAL_TESTS=1 PYTHON_BINARY=/usr/bin/python3.10 make ${ALL_TESTS[testName].test_target}"""
108
+ }
109
+ } else {
110
+ sh "make venv"
111
+ sh "SKIP_MANUAL_TESTS=1 PYTHON_BINARY=${WORKSPACE}/${testName}/venv/bin/python make install"
112
+ sh "SKIP_MANUAL_TESTS=1 PYTHON_BINARY=${WORKSPACE}/${testName}/venv/bin/python make install-${ALL_TESTS[testName].install_deps}"
113
+ sh """DEFAULT_MARKERS="${ALL_TESTS[testName].test_markers}" SKIP_MANUAL_TESTS=1 PYTHON_BINARY=${WORKSPACE}/${testName}/venv/bin/python make ${ALL_TESTS[testName].test_target}"""
114
+ }
115
+ }
116
+ }
117
+ } catch (e) {
118
+ throw e
119
+ } finally {
120
+ sh "mv ${testName}/test_report.xml ${testName}/${testName}_report.xml"
121
+ archiveArtifacts allowEmptyArchive: true, artifacts: "${testName}/${testName}_report.xml"
122
+ junit testResults: "${testName}/${testName}_report.xml", keepLongStdio: true, allowEmptyResults: true
123
+ }
124
+ }
125
+ }
126
+ }
127
+ }
128
+ }
129
+ }
130
+
131
+ parallel(testTargets)
132
+ }
133
+ }
134
+ }
135
+
136
+ stage('Publish') {
137
+ when {
138
+ anyOf {
139
+ expression { return params.publish }
140
+ }
141
+ beforeAgent true
142
+ }
143
+ agent {
144
+ label "linux && docker"
145
+ }
146
+ steps {
147
+ script {
148
+ sh "make IS_PR_BUILD=${isPrBranch()} BUILD_NUMBER=${env.BUILD_ID} BUILD_BASE_NAME=${env.JOB_BASE_NAME} publish"
149
+ }
150
+ }
151
+ }
152
+ }
153
+ }
154
+
155
+ def isPrBranch() {
156
+ return (env.CHANGE_BRANCH != null && env.CHANGE_BRANCH != '') ||
157
+ (env.BRANCH_NAME != null && env.BRANCH_NAME.startsWith("PR-"))
158
+ }
client/.gitignore ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### Copied files ###
2
+ h2ogpt_client/_h2ogpt_*.py
3
+
4
+ ### Poetry ###
5
+ .poetry
6
+ poetry
7
+
8
+ ### Python template
9
+ # Byte-compiled / optimized / DLL files
10
+ __pycache__/
11
+ *.py[cod]
12
+ *$py.class
13
+
14
+ # C extensions
15
+ *.so
16
+
17
+ # Distribution / packaging
18
+ .Python
19
+ build/
20
+ develop-eggs/
21
+ dist/
22
+ downloads/
23
+ eggs/
24
+ .eggs/
25
+ lib/
26
+ lib64/
27
+ parts/
28
+ sdist/
29
+ var/
30
+ wheels/
31
+ share/python-wheels/
32
+ *.egg-info/
33
+ .installed.cfg
34
+ *.egg
35
+ MANIFEST
36
+
37
+ # PyInstaller
38
+ # Usually these files are written by a python script from a template
39
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
40
+ *.manifest
41
+ *.spec
42
+
43
+ # Installer logs
44
+ pip-log.txt
45
+ pip-delete-this-directory.txt
46
+
47
+ # Unit test / coverage reports
48
+ htmlcov/
49
+ .tox/
50
+ .nox/
51
+ .coverage
52
+ .coverage.*
53
+ .cache
54
+ nosetests.xml
55
+ coverage.xml
56
+ *.cover
57
+ *.py,cover
58
+ .hypothesis/
59
+ .pytest_cache/
60
+ cover/
61
+
62
+ # Translations
63
+ *.mo
64
+ *.pot
65
+
66
+ # Django stuff:
67
+ *.log
68
+ local_settings.py
69
+ db.sqlite3
70
+ db.sqlite3-journal
71
+
72
+ # Flask stuff:
73
+ instance/
74
+ .webassets-cache
75
+
76
+ # Scrapy stuff:
77
+ .scrapy
78
+
79
+ # Sphinx documentation
80
+ docs/_build/
81
+
82
+ # PyBuilder
83
+ .pybuilder/
84
+ target/
85
+
86
+ # Jupyter Notebook
87
+ .ipynb_checkpoints
88
+
89
+ # IPython
90
+ profile_default/
91
+ ipython_config.py
92
+
93
+ # pyenv
94
+ # For a library or package, you might want to ignore these files since the code is
95
+ # intended to run in multiple environments; otherwise, check them in:
96
+ # .python-version
97
+
98
+ # pipenv
99
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
100
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
101
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
102
+ # install all needed dependencies.
103
+ #Pipfile.lock
104
+
105
+ # poetry
106
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
107
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
108
+ # commonly ignored for libraries.
109
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
110
+ #poetry.lock
111
+
112
+ # pdm
113
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
114
+ #pdm.lock
115
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
116
+ # in version control.
117
+ # https://pdm.fming.dev/#use-with-ide
118
+ .pdm.toml
119
+
120
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121
+ __pypackages__/
122
+
123
+ # Celery stuff
124
+ celerybeat-schedule
125
+ celerybeat.pid
126
+
127
+ # SageMath parsed files
128
+ *.sage.py
129
+
130
+ # Environments
131
+ .env
132
+ .venv
133
+ env/
134
+ venv/
135
+ ENV/
136
+ env.bak/
137
+ venv.bak/
138
+
139
+ # Spyder project settings
140
+ .spyderproject
141
+ .spyproject
142
+
143
+ # Rope project settings
144
+ .ropeproject
145
+
146
+ # mkdocs documentation
147
+ /site
148
+
149
+ # mypy
150
+ .mypy_cache/
151
+ .dmypy.json
152
+ dmypy.json
153
+
154
+ # Pyre type checker
155
+ .pyre/
156
+
157
+ # pytype static type analyzer
158
+ .pytype/
159
+
160
+ # Cython debug symbols
161
+ cython_debug/
162
+
163
+ # PyCharm
164
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
167
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
168
+ .idea/
client/Makefile ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ POETRY_INSTALL_DIR := $(abspath ./.poetry)
2
+ POETRY_BIN := $(POETRY_INSTALL_DIR)/bin/poetry
3
+
4
+ PACKAGE_NAME = $(firstword $(shell $(POETRY_BIN) version))
5
+ PACKAGE_DIR = $(subst -,_,$(PACKAGE_NAME))
6
+ PACKAGE_VERSION = $(shell $(POETRY_BIN) version --short)
7
+
8
+ # Space separated list of file path that needs to be copied from h2oGPT.
9
+ FILES_FROM_H2OGPT := enums.py
10
+ NAME_PREFIX_FOR_FILES_COPIED_FROM_H2OGPT = _h2ogpt_
11
+
12
+ $(POETRY_BIN):
13
+ @echo "Installing Poetry into '$(POETRY_INSTALL_DIR)' ..."
14
+ curl -sSL https://install.python-poetry.org | POETRY_HOME="$(POETRY_INSTALL_DIR)" python3 - --force --version 1.5.1
15
+
16
+ .PHONY: copy_files_from_h2ogpt
17
+ copy_files_from_h2ogpt:
18
+ for file in $(FILES_FROM_H2OGPT); do \
19
+ dst="$(PACKAGE_DIR)/$(NAME_PREFIX_FOR_FILES_COPIED_FROM_H2OGPT)$(notdir $$file)"; \
20
+ echo "Copying '$$file' to '$$dst' ..."; \
21
+ cp -f "./../src/$$file" "$$dst"; \
22
+ done
23
+
24
+ .PHONY: clean
25
+ clean:
26
+ rm -rf dist
27
+ find "$(PACKAGE_DIR)" -name "$(NAME_PREFIX_FOR_FILES_COPIED_FROM_H2OGPT)*" -delete
28
+
29
+ .PHONY: clean_deep
30
+ clean_deep: clean
31
+ rm -rf "$(POETRY_INSTALL_DIR)"
32
+ rm -rf ".venv"
33
+
34
+ .PHONY: setup
35
+ setup: $(POETRY_BIN)
36
+ $(POETRY_BIN) install
37
+
38
+ .PHONY: setup_test
39
+ setup_test:
40
+ $(POETRY_BIN) install --only=test
41
+
42
+ .PHONY: lint
43
+ lint: copy_files_from_h2ogpt
44
+ $(POETRY_BIN) run black .
45
+ $(POETRY_BIN) run isort .
46
+ $(POETRY_BIN) run flake8 "$(PACKAGE_DIR)" "tests" || true
47
+ $(POETRY_BIN) run mypy --show-error-codes --pretty .
48
+
49
+ .PHONY: test
50
+ test: copy_files_from_h2ogpt
51
+ $(POETRY_BIN) run pytest -r=A
52
+
53
+ .PHONY: build
54
+ build: copy_files_from_h2ogpt
55
+ $(POETRY_BIN) build
56
+
57
+ print-%:
58
+ @echo $($*)
client/README.md ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # h2oGPT Client
2
+ A Python thin-client for h2oGPT.
3
+
4
+ ## Prerequisites
5
+ - Python 3.8+
6
+
7
+ If you don't have Python 3.8 in your system, you can use [Conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/index.html).
8
+ ```bash
9
+ conda create -n h2ogpt_client_build -y
10
+ conda activate h2ogpt_client_build
11
+ conda install python=3.8 -y
12
+ ```
13
+
14
+ ## Download Client Wheel
15
+
16
+ Install the latest nightly wheel from S3.
17
+
18
+ ```bash
19
+ pip install https://s3.amazonaws.com/artifacts.h2o.ai/snapshots/ai/h2o/h2ogpt_client/latest-nightly/h2ogpt_client-0.1.0-py3-none-any.whl
20
+ ```
21
+
22
+ Nightly releases can also be found [here](https://github.com/h2oai/h2ogpt/releases)
23
+
24
+ ## Build Client Wheel
25
+
26
+ If want to build fresh wheel from main branch instead of getting nightly, follow these instructions.
27
+
28
+ ### Setup
29
+ :information_source: [Poetry](https://python-poetry.org) is used as the build tool.
30
+ ```shell
31
+ rm -rf client/.poetry/
32
+ make -C client setup
33
+ ```
34
+
35
+ ### Build
36
+ ```shell
37
+ make -C client build
38
+ ```
39
+ Distribution wheel file can be found in the `client/dist` directory. This wheel can be installed in the primary h2oGPT environment or any other environment, e.g.
40
+ ```bash
41
+ pip uninstall -y h2ogpt_client
42
+ pip install client/dist/h2ogpt_client-*-py3-none-any.whl
43
+ ```
44
+
45
+ ## Usage
46
+
47
+ Based upon [test code](tests/test_client.py) and test code `test_readme_example`:
48
+ ```python
49
+
50
+
51
+ def test_readme_example(local_server):
52
+ import os
53
+ import asyncio
54
+ from h2ogpt_client import Client
55
+
56
+ if local_server:
57
+ client = Client("http://0.0.0.0:7860")
58
+ else:
59
+ h2ogpt_key = os.getenv("H2OGPT_KEY") or os.getenv("H2OGPT_H2OGPT_KEY")
60
+ if h2ogpt_key is None:
61
+ return
62
+ # if you have API key for public instance:
63
+ client = Client("https://gpt.h2o.ai", h2ogpt_key=h2ogpt_key)
64
+
65
+ # Text completion
66
+ text_completion = client.text_completion.create()
67
+ response = asyncio.run(text_completion.complete("Hello world"))
68
+ print("asyncio text completion response: %s" % response)
69
+ # Text completion: synchronous
70
+ response = text_completion.complete_sync("Hello world")
71
+ print("sync text completion response: %s" % response)
72
+
73
+ # Chat completion
74
+ chat_completion = client.chat_completion.create()
75
+ reply = asyncio.run(chat_completion.chat("Hey!"))
76
+ print("asyncio text completion user: %s gpt: %s" % (reply["user"], reply["gpt"]))
77
+ chat_history = chat_completion.chat_history()
78
+ print("chat_history: %s" % chat_history)
79
+ # Chat completion: synchronous
80
+ reply = chat_completion.chat_sync("Hey!")
81
+ print("sync chat completion gpt: %s" % reply["gpt"])
82
+
83
+ test_readme_example(local_server=True)
84
+ ```
85
+ :warning: **Note**: Client APIs are still evolving. Hence, APIs can be changed without prior warnings.
86
+
87
+ ## Development Guide
88
+
89
+ ### Test
90
+
91
+ In an h2oGPT environment with the client installed, can run tests that test client and server.
92
+
93
+ ### Test with h2oGPT env
94
+ 1. Install test dependencies of the Client into the h2oGPT Python environment.
95
+ ```shell
96
+ make -C client setup_test
97
+ ```
98
+ 2. Run the tests with h2oGPT.
99
+ ```shell
100
+ pytest client/tests/
101
+ ```
102
+
103
+ #### Test with an existing h2oGPT server
104
+ If you already have a running h2oGPT server, then set the `H2OGPT_SERVER` environment variable to use it for testing.
105
+ ```shell
106
+ make H2OGPT_SERVER="http://0.0.0.0:7860" -C client test
107
+ ```
client/h2ogpt_client/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from h2ogpt_client._core import Client
2
+ from h2ogpt_client._h2ogpt_enums import LangChainMode, PromptType
3
+
4
+ __all__ = ["Client", "PromptType", "LangChainMode"]
client/h2ogpt_client/_completion.py ADDED
@@ -0,0 +1,507 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import abc
2
+ import ast
3
+ import collections
4
+ from typing import (
5
+ Any,
6
+ AsyncGenerator,
7
+ Dict,
8
+ Generator,
9
+ List,
10
+ Optional,
11
+ OrderedDict,
12
+ Union,
13
+ )
14
+
15
+ from h2ogpt_client._gradio_client import GradioClientWrapper
16
+ from h2ogpt_client._h2ogpt_enums import (
17
+ DocumentSubset,
18
+ LangChainAction,
19
+ LangChainMode,
20
+ PromptType,
21
+ )
22
+ from h2ogpt_client._models import Model
23
+
24
+ _H2OGPT_PARAMETERS_TO_CLIENT = collections.OrderedDict(
25
+ instruction="instruction",
26
+ iinput="input",
27
+ context="system_pre_context",
28
+ stream_output="stream_output",
29
+ prompt_type="prompt_type",
30
+ prompt_dict="prompt_dict",
31
+ temperature="temperature",
32
+ top_p="top_p",
33
+ top_k="top_k",
34
+ penalty_alpha="penalty_alpha",
35
+ num_beams="beams",
36
+ max_new_tokens="max_output_length",
37
+ min_new_tokens="min_output_length",
38
+ early_stopping="early_stopping",
39
+ max_time="max_time",
40
+ repetition_penalty="repetition_penalty",
41
+ num_return_sequences="number_returns",
42
+ do_sample="enable_sampler",
43
+ chat="chat",
44
+ instruction_nochat="instruction_nochat",
45
+ iinput_nochat="input_context_for_instruction",
46
+ langchain_mode="langchain_mode",
47
+ add_chat_history_to_context="add_chat_history_to_context",
48
+ langchain_action="langchain_action",
49
+ langchain_agents="langchain_agents",
50
+ top_k_docs="langchain_top_k_docs",
51
+ chunk="langchain_enable_chunk",
52
+ chunk_size="langchain_chunk_size",
53
+ document_subset="langchain_document_subset",
54
+ document_choice="langchain_document_choice",
55
+ document_source_substrings="langchain_document_source_substrings",
56
+ document_source_substrings_op="langchain_document_source_substrings_op",
57
+ document_content_substrings="langchain_document_content_substrings",
58
+ document_content_substrings_op="langchain_document_content_substrings_op",
59
+ pre_prompt_query="pre_prompt_query",
60
+ prompt_query="prompt_query",
61
+ pre_prompt_summary="pre_prompt_summary",
62
+ prompt_summary="prompt_summary",
63
+ hyde_llm_prompt="hyde_llm_prompt",
64
+ system_prompt="system_prompt",
65
+ image_audio_loaders="image_audio_loaders",
66
+ pdf_loaders="pdf_loaders",
67
+ url_loaders="url_loaders",
68
+ jq_schema="jq_schema",
69
+ visible_models="model",
70
+ h2ogpt_key="h2ogpt_key",
71
+ add_search_to_context="add_search_to_context",
72
+ chat_conversation="chat_conversation",
73
+ text_context_list="text_context_list",
74
+ docs_ordering_type="docs_ordering_type",
75
+ min_max_new_tokens="min_max_new_tokens",
76
+ max_input_tokens="max_input_tokens",
77
+ max_total_input_tokens="max_total_input_tokens",
78
+ docs_token_handling="docs_token_handling",
79
+ docs_joiner="docs_joiner",
80
+ hyde_level="hyde_level",
81
+ hyde_template="hyde_template",
82
+ hyde_show_only_final="hyde_show_only_final",
83
+ doc_json_mode="doc_json_mode",
84
+ chatbot_role="chatbot_role",
85
+ speaker="speaker",
86
+ tts_language="tts_language",
87
+ tts_speed="tts_speed",
88
+ )
89
+
90
+
91
+ def _to_h2ogpt_params(client_params: Dict[str, Any]) -> OrderedDict[str, Any]:
92
+ """Convert given params to the order of params in h2oGPT."""
93
+
94
+ h2ogpt_params: OrderedDict[str, Any] = collections.OrderedDict()
95
+ for h2ogpt_param_name, client_param_name in _H2OGPT_PARAMETERS_TO_CLIENT.items():
96
+ if client_param_name in client_params:
97
+ h2ogpt_params[h2ogpt_param_name] = client_params[client_param_name]
98
+ return h2ogpt_params
99
+
100
+
101
+ _DEFAULT_PARAMETERS: Dict[str, Any] = dict(
102
+ instruction="",
103
+ input="",
104
+ system_pre_context="",
105
+ stream_output=False,
106
+ prompt_type=PromptType.plain.value,
107
+ prompt_dict="", # empty as prompt_type cannot be 'custom'
108
+ temperature=0.1,
109
+ top_p=1.0,
110
+ top_k=40,
111
+ penalty_alpha=0.0,
112
+ beams=1.0,
113
+ max_output_length=1024,
114
+ min_output_length=0,
115
+ early_stopping=False,
116
+ max_time=360,
117
+ repetition_penalty=1.07,
118
+ number_returns=1,
119
+ enable_sampler=False,
120
+ chat=False,
121
+ instruction_nochat="",
122
+ input_context_for_instruction="",
123
+ langchain_mode=LangChainMode.DISABLED.value,
124
+ add_chat_history_to_context=False, # relevant only for the UI
125
+ langchain_action=LangChainAction.QUERY.value,
126
+ langchain_agents=[],
127
+ langchain_top_k_docs=4, # langchain: number of document chunks
128
+ langchain_enable_chunk=True, # langchain: whether to chunk documents
129
+ langchain_chunk_size=512, # langchain: chunk size for document chunking
130
+ langchain_document_subset=DocumentSubset.Relevant.name,
131
+ langchain_document_choice=[],
132
+ langchain_document_source_substrings=[],
133
+ langchain_document_source_substrings_op='and',
134
+ langchain_document_content_substrings=[],
135
+ langchain_document_content_substrings_op='and',
136
+ pre_prompt_query=[],
137
+ prompt_query="",
138
+ pre_prompt_summary="",
139
+ prompt_summary="",
140
+ hyde_llm_prompt="",
141
+ system_prompt="",
142
+ image_audio_loaders=[],
143
+ pdf_loaders=[],
144
+ url_loaders=[],
145
+ jq_schema=".[]",
146
+ model=None,
147
+ h2ogpt_key=None,
148
+ add_search_to_context=False,
149
+ chat_conversation=None,
150
+ text_context_list=[],
151
+ docs_ordering_type="reverse_ucurve_sort",
152
+ min_max_new_tokens=256,
153
+ max_input_tokens=-1,
154
+ max_total_input_tokens=-1,
155
+ docs_token_handling="split_or_merge",
156
+ docs_joiner="\n\n",
157
+ hyde_level=0,
158
+ hyde_template=None,
159
+ hyde_show_only_final=None,
160
+ doc_json_mode=False,
161
+ chatbot_role="None",
162
+ speaker="None",
163
+ tts_language="autodetect",
164
+ tts_speed=1.0,
165
+ )
166
+
167
+
168
+ class _Completion(abc.ABC):
169
+ _API_NAME = "/submit_nochat_api"
170
+
171
+ def __init__(self, client: GradioClientWrapper, parameters: OrderedDict[str, Any]):
172
+ self._client = client
173
+ self._parameters = dict(parameters)
174
+
175
+ def _get_parameters(self, prompt: str) -> Dict[str, Any]:
176
+ self._parameters["instruction_nochat"] = prompt
177
+ return self._parameters
178
+
179
+ @staticmethod
180
+ def _get_reply(response: str) -> str:
181
+ return ast.literal_eval(response)["response"]
182
+
183
+ def _predict(self, prompt: str) -> str:
184
+ response = self._client.predict(
185
+ str(self._get_parameters(prompt)), api_name=self._API_NAME
186
+ )
187
+ return self._get_reply(response)
188
+
189
+ def _predict_and_stream(self, prompt: str) -> Generator[str, None, None]:
190
+ generator = self._client.predict_and_stream(
191
+ str(self._get_parameters(prompt)), api_name=self._API_NAME
192
+ )
193
+ reply_size_so_far = 0
194
+ for response in generator:
195
+ current_reply = self._get_reply(response)
196
+ new_reply_chunk = current_reply[reply_size_so_far:]
197
+ if not new_reply_chunk:
198
+ continue
199
+ reply_size_so_far += len(new_reply_chunk)
200
+ yield new_reply_chunk
201
+
202
+ async def _submit(self, prompt: str) -> str:
203
+ response = await self._client.submit(
204
+ str(self._get_parameters(prompt)), api_name=self._API_NAME
205
+ )
206
+ return self._get_reply(response)
207
+
208
+ async def _submit_and_stream(self, prompt: str) -> AsyncGenerator[str, None]:
209
+ generator = self._client.submit_and_stream(
210
+ str(self._get_parameters(prompt)), api_name=self._API_NAME
211
+ )
212
+ reply_size_so_far = 0
213
+ async for response in generator:
214
+ current_reply = self._get_reply(response)
215
+ new_reply_chunk = current_reply[reply_size_so_far:]
216
+ if not new_reply_chunk:
217
+ continue
218
+ reply_size_so_far += len(new_reply_chunk)
219
+ yield new_reply_chunk
220
+
221
+
222
+ class TextCompletionCreator:
223
+ """Builder that can create text completions."""
224
+
225
+ def __init__(self, client: GradioClientWrapper):
226
+ self._client = client
227
+
228
+ def create(
229
+ self,
230
+ model: Union[None, Model, str] = None,
231
+ prompt_type: PromptType = PromptType.plain,
232
+ input_context_for_instruction: str = "",
233
+ enable_sampler=False,
234
+ temperature: float = 0.1,
235
+ top_p: float = 1.0,
236
+ top_k: int = 40,
237
+ penalty_alpha: float = 0.0,
238
+ beams: float = 1.0,
239
+ early_stopping: bool = False,
240
+ min_output_length: int = 0,
241
+ max_output_length: int = 1024,
242
+ max_time: int = 360,
243
+ repetition_penalty: float = 1.07,
244
+ number_returns: int = 1,
245
+ system_pre_context: str = "",
246
+ langchain_mode: LangChainMode = LangChainMode.DISABLED,
247
+ system_prompt: str = "",
248
+ add_search_to_context: bool = False,
249
+ text_context_list: List[str] = [],
250
+ docs_ordering_type: str = "reverse_ucurve_sort",
251
+ min_max_new_tokens: int = 256,
252
+ max_input_tokens: int = -1,
253
+ max_total_input_tokens: int = -1,
254
+ docs_token_handling: str = "split_or_merge",
255
+ docs_joiner: str = "\n\n",
256
+ hyde_level: int = 0,
257
+ hyde_template: Optional[str] = None,
258
+ hyde_show_only_final: bool = False,
259
+ doc_json_mode: bool = False,
260
+ chatbot_role="None",
261
+ speaker="None",
262
+ tts_language="autodetect",
263
+ tts_speed=1.0,
264
+ ) -> "TextCompletion":
265
+ """
266
+ Creates a new text completion.
267
+
268
+ :param model: model to be used, `None` means used the default model.
269
+ :param prompt_type: type of the prompt
270
+ :param input_context_for_instruction: input context for instruction
271
+ :param enable_sampler: enable or disable the sampler, required for use of
272
+ temperature, top_p, top_k
273
+ :param temperature: What sampling temperature to use, between 0 and 3.
274
+ Lower values will make it more focused and deterministic, but may lead
275
+ to repeat. Higher values will make the output more creative, but may
276
+ lead to hallucinations.
277
+ :param top_p: cumulative probability of tokens to sample from
278
+ :param top_k: number of tokens to sample from
279
+ :param penalty_alpha: >0 and top_k>1 enable contrastive search (not all models support)
280
+ :param beams: Number of searches for optimal overall probability.
281
+ Higher values uses more GPU memory and compute.
282
+ :param early_stopping: whether to stop early or not in beam search
283
+ :param min_output_length: minimum output length
284
+ :param max_output_length: maximum output length
285
+ :param max_time: maximum time to search optimal output
286
+ :param repetition_penalty: penalty for repetition
287
+ :param number_returns:
288
+ :param system_pre_context: directly pre-appended without prompt processing
289
+ :param langchain_mode: LangChain mode
290
+ :param system_prompt: Universal system prompt to override prompt_type's system
291
+ prompt
292
+ If pass 'None' or 'auto' or None, then automatic per-model value used
293
+ :param add_search_to_context: Whether to add web search of query to context
294
+ :param text_context_list: list of strings to use as context (up to allowed max_seq_len of model)
295
+ :param docs_ordering_type: By default uses 'reverse_ucurve_sort' for optimal retrieval
296
+ :param min_max_new_tokens: minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.
297
+ :param max_input_tokens: Max input tokens to place into model context for each LLM call
298
+ -1 means auto, fully fill context for query, and fill by original document chunk for summarization
299
+ >=0 means use that to limit context filling to that many tokens
300
+ :param max_total_input_tokens: like max_input_tokens but instead of per LLM call, applies across all LLM calls for single summarization/extraction action
301
+ :param docs_token_handling: 'chunk' means fill context with top_k_docs (limited by max_input_tokens or model_max_len) chunks for query
302
+ or top_k_docs original document chunks summarization
303
+ None or 'split_or_merge' means same as 'chunk' for query, while for summarization merges documents to fill up to max_input_tokens or model_max_len tokens
304
+ :param docs_joiner: string to join lists of text when doing split_or_merge. None means '\n\n'
305
+ :param hyde_level: HYDE level for HYDE approach (https://arxiv.org/abs/2212.10496)
306
+ 0: No HYDE
307
+ 1: Use non-document-based LLM response and original query for embedding query
308
+ 2: Use document-based LLM response and original query for embedding query
309
+ 3+: Continue iterations of embedding prior answer and getting new response
310
+ :param hyde_template:
311
+ None, 'None', 'auto' uses internal value and enable
312
+ '{query}' is minimal template one can pass
313
+ :param hyde_show_only_final: See h2oGPT server docs
314
+ :param doc_json_mode: whether to give JSON to LLM and get JSON response back
315
+ :param chatbot_role: See h2oGPT server docs
316
+ :param speaker: See h2oGPT server docs
317
+ :param tts_language: See h2oGPT server docs
318
+ :param tts_speed: See h2oGPT server docs
319
+ """
320
+ args = locals().copy()
321
+ args["prompt_type"] = prompt_type.value # convert to serializable type
322
+ args["langchain_mode"] = langchain_mode.value # convert to serializable type
323
+ params = _to_h2ogpt_params({**_DEFAULT_PARAMETERS, **args})
324
+ params["instruction_nochat"] = None # future prompt
325
+ params["h2ogpt_key"] = self._client.h2ogpt_key
326
+ return TextCompletion(self._client, params)
327
+
328
+
329
+ class TextCompletion(_Completion):
330
+ """Text completion."""
331
+
332
+ async def complete(
333
+ self, prompt: str, enable_streaming: bool = False
334
+ ) -> Union[str, AsyncGenerator[str, None]]:
335
+ """
336
+ Complete this text completion.
337
+
338
+ :param prompt: text prompt to generate completion for
339
+ :param enable_streaming: whether to enable or disable streaming the response
340
+ :return: response from the model
341
+ """
342
+ if enable_streaming:
343
+ params = self._get_parameters(prompt)
344
+ params["stream_output"] = True
345
+ return self._submit_and_stream(prompt)
346
+ else:
347
+ return await self._submit(prompt)
348
+
349
+ def complete_sync(
350
+ self, prompt: str, enable_streaming: bool = False
351
+ ) -> Union[str, Generator[str, None, None]]:
352
+ """
353
+ Complete this text completion synchronously.
354
+
355
+ :param prompt: text prompt to generate completion for
356
+ :param enable_streaming: whether to enable or disable streaming the response
357
+ :return: response from the model
358
+ """
359
+ if enable_streaming:
360
+ params = self._get_parameters(prompt)
361
+ params["stream_output"] = True
362
+ return self._predict_and_stream(prompt)
363
+ else:
364
+ return self._predict(prompt)
365
+
366
+
367
+ class ChatCompletionCreator:
368
+ """Chat completion."""
369
+
370
+ def __init__(self, client: GradioClientWrapper):
371
+ self._client = client
372
+
373
+ def create(
374
+ self,
375
+ model: Union[None, Model, str] = None,
376
+ prompt_type: PromptType = PromptType.plain,
377
+ input_context_for_instruction: str = "",
378
+ enable_sampler=False,
379
+ temperature: float = 0.1,
380
+ top_p: float = 1.0,
381
+ top_k: int = 40,
382
+ penalty_alpha: float = 0.0,
383
+ beams: float = 1.0,
384
+ early_stopping: bool = False,
385
+ min_output_length: int = 0,
386
+ max_output_length: int = 1024,
387
+ max_time: int = 360,
388
+ repetition_penalty: float = 1.07,
389
+ number_returns: int = 1,
390
+ system_pre_context: str = "",
391
+ langchain_mode: LangChainMode = LangChainMode.DISABLED,
392
+ system_prompt: str = "",
393
+ add_search_to_context: bool = False,
394
+ text_context_list: List[str] = [],
395
+ docs_ordering_type: str = "reverse_ucurve_sort",
396
+ min_max_new_tokens: int = 256,
397
+ max_input_tokens: int = -1,
398
+ max_total_input_tokens: int = -1,
399
+ docs_token_handling: str = "split_or_merge",
400
+ docs_joiner: str = "\n\n",
401
+ hyde_level: int = 0,
402
+ hyde_template: Optional[str] = None,
403
+ hyde_show_only_final: bool = False,
404
+ doc_json_mode: bool = False,
405
+ chatbot_role="None",
406
+ speaker="None",
407
+ tts_language="autodetect",
408
+ tts_speed=1.0,
409
+ ) -> "ChatCompletion":
410
+ """
411
+ Creates a new chat completion.
412
+
413
+ :param model: model to be used, `None` means used the default model.
414
+ :param prompt_type: type of the prompt
415
+ :param input_context_for_instruction: input context for instruction
416
+ :param enable_sampler: enable or disable the sampler, required for use of
417
+ temperature, top_p, top_k
418
+ :param temperature: What sampling temperature to use, between 0 and 3.
419
+ Lower values will make it more focused and deterministic, but may lead
420
+ to repeat. Higher values will make the output more creative, but may
421
+ lead to hallucinations.
422
+ :param top_p: cumulative probability of tokens to sample from
423
+ :param top_k: number of tokens to sample from
424
+ :param penalty_alpha: >0 and top_k>1 enable contrastive search (not all models support)
425
+ :param beams: Number of searches for optimal overall probability.
426
+ Higher values uses more GPU memory and compute.
427
+ :param early_stopping: whether to stop early or not in beam search
428
+ :param min_output_length: minimum output length
429
+ :param max_output_length: maximum output length
430
+ :param max_time: maximum time to search optimal output
431
+ :param repetition_penalty: penalty for repetition
432
+ :param number_returns:
433
+ :param system_pre_context: directly pre-appended without prompt processing
434
+ :param langchain_mode: LangChain mode
435
+ :param system_prompt: Universal system prompt to override prompt_type's system
436
+ prompt
437
+ :param add_search_to_context: Whether to add web search of query to context
438
+ :param text_context_list: list of strings to use as context (up to allowed max_seq_len of model)
439
+ :param docs_ordering_type: By default uses 'reverse_ucurve_sort' for optimal retrieval
440
+ :param min_max_new_tokens: minimum value for max_new_tokens when auto-adjusting for content of prompt, docs, etc.
441
+ :param max_input_tokens: Max input tokens to place into model context for each LLM call
442
+ -1 means auto, fully fill context for query, and fill by original document chunk for summarization
443
+ >=0 means use that to limit context filling to that many tokens
444
+ :param max_total_input_tokens: like max_input_tokens but instead of per LLM call, applies across all LLM calls for single summarization/extraction action
445
+ :param docs_token_handling: 'chunk' means fill context with top_k_docs (limited by max_input_tokens or model_max_len) chunks for query
446
+ or top_k_docs original document chunks summarization
447
+ None or 'split_or_merge' means same as 'chunk' for query, while for summarization merges documents to fill up to max_input_tokens or model_max_len tokens
448
+ :param docs_joiner: string to join lists of text when doing split_or_merge. None means '\n\n'
449
+ :param hyde_level: HYDE level for HYDE approach (https://arxiv.org/abs/2212.10496)
450
+ 0: No HYDE
451
+ 1: Use non-document-based LLM response and original query for embedding query
452
+ 2: Use document-based LLM response and original query for embedding query
453
+ 3+: Continue iterations of embedding prior answer and getting new response
454
+ :param hyde_template:
455
+ None, 'None', 'auto' uses internal value and enable
456
+ '{query}' is minimal template one can pass
457
+ :param hyde_show_only_final: See h2oGPT server docs
458
+ :param doc_json_mode: whether to give JSON to LLM and get JSON response back
459
+ :param chatbot_role: See h2oGPT server docs
460
+ :param speaker: See h2oGPT server docs
461
+ :param tts_language: See h2oGPT server docs
462
+ :param tts_speed: See h2oGPT server docs
463
+ """
464
+ args = locals().copy()
465
+ args["prompt_type"] = prompt_type.value # convert to serializable type
466
+ args["langchain_mode"] = langchain_mode.value # convert to serializable type
467
+ params = _to_h2ogpt_params({**_DEFAULT_PARAMETERS, **args})
468
+ params["instruction_nochat"] = None # future prompts
469
+ params["add_chat_history_to_context"] = True
470
+ params["h2ogpt_key"] = self._client.h2ogpt_key
471
+ params["chat_conversation"] = [] # chat history (FIXME: Only works if 1 model?)
472
+ return ChatCompletion(self._client, params)
473
+
474
+
475
+ class ChatCompletion(_Completion):
476
+ """Chat completion."""
477
+
478
+ def _update_history(self, prompt: str, reply: str) -> None:
479
+ self._parameters["chat_conversation"].append((prompt, reply))
480
+
481
+ async def chat(self, prompt: str) -> Dict[str, str]:
482
+ """
483
+ Complete this chat completion.
484
+
485
+ :param prompt: text prompt to generate completions for
486
+ :returns chat reply
487
+ """
488
+ reply = await self._submit(prompt)
489
+ self._update_history(prompt, reply)
490
+ return {"user": prompt, "gpt": reply}
491
+
492
+ def chat_sync(self, prompt: str) -> Dict[str, str]:
493
+ """
494
+ Complete this chat completion.
495
+
496
+ :param prompt: text prompt to generate completions for
497
+ :returns chat reply
498
+ """
499
+ reply = self._predict(prompt)
500
+ self._update_history(prompt, reply)
501
+ return {"user": prompt, "gpt": reply}
502
+
503
+ def chat_history(self) -> List[Dict[str, str]]:
504
+ """Returns the full chat history."""
505
+ return [
506
+ {"user": i[0], "gpt": i[1]} for i in self._parameters["chat_conversation"]
507
+ ]
client/h2ogpt_client/_core.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Optional
2
+
3
+ from h2ogpt_client._completion import ChatCompletionCreator, TextCompletionCreator
4
+ from h2ogpt_client._gradio_client import GradioClientWrapper
5
+ from h2ogpt_client._models import Models
6
+ from h2ogpt_client._server import Server
7
+
8
+
9
+ class Client:
10
+ """h2oGPT Client."""
11
+
12
+ def __init__(
13
+ self,
14
+ src: str,
15
+ h2ogpt_key: Optional[str] = None,
16
+ huggingface_token: Optional[str] = None,
17
+ ):
18
+ """
19
+ Creates a GPT client.
20
+ :param src: either the full URL to the hosted h2oGPT
21
+ (e.g. "http://0.0.0.0:7860", "https://fc752f297207f01c32.gradio.live")
22
+ or name of the Hugging Face Space to load, (e.g. "h2oai/h2ogpt-chatbot")
23
+ :param h2ogpt_key: access key to connect with a h2oGPT server
24
+ :param huggingface_token: Hugging Face token to use to access private Spaces
25
+ """
26
+ self._client = GradioClientWrapper(src, h2ogpt_key, huggingface_token)
27
+ self._text_completion = TextCompletionCreator(self._client)
28
+ self._chat_completion = ChatCompletionCreator(self._client)
29
+ self._models = Models(self._client)
30
+ self._server = Server(self._client)
31
+
32
+ @property
33
+ def text_completion(self) -> TextCompletionCreator:
34
+ """Text completion."""
35
+ return self._text_completion
36
+
37
+ @property
38
+ def chat_completion(self) -> ChatCompletionCreator:
39
+ """Chat completion."""
40
+ return self._chat_completion
41
+
42
+ @property
43
+ def models(self) -> Models:
44
+ """LL models."""
45
+ return self._models
46
+
47
+ @property
48
+ def server(self) -> Server:
49
+ """h2oGPT server."""
50
+ return self._server
client/h2ogpt_client/_gradio_client.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import time
3
+ from typing import Any, AsyncGenerator, Generator, List, Optional
4
+
5
+ import gradio_client # type: ignore
6
+
7
+
8
+ class GradioClientWrapper:
9
+ def __init__(
10
+ self,
11
+ src: str,
12
+ h2ogpt_key: Optional[str] = None,
13
+ huggingface_token: Optional[str] = None,
14
+ ):
15
+ self._client = gradio_client.Client(
16
+ src=src, hf_token=huggingface_token, serialize=False, verbose=False
17
+ )
18
+ self.h2ogpt_key = h2ogpt_key
19
+
20
+ def predict(self, *args, api_name: str) -> Any:
21
+ return self._client.predict(*args, api_name=api_name)
22
+
23
+ def predict_and_stream(self, *args, api_name: str) -> Generator[str, None, None]:
24
+ job = self._client.submit(*args, api_name=api_name)
25
+ while not job.done():
26
+ outputs: List[str] = job.outputs()
27
+ if not len(outputs):
28
+ time.sleep(0.1)
29
+ continue
30
+ newest_response = outputs[-1]
31
+ yield newest_response
32
+
33
+ e = job.exception()
34
+ if e and isinstance(e, BaseException):
35
+ raise RuntimeError from e
36
+
37
+ async def submit(self, *args, api_name: str) -> Any:
38
+ return await asyncio.wrap_future(self._client.submit(*args, api_name=api_name))
39
+
40
+ async def submit_and_stream(
41
+ self, *args, api_name: str
42
+ ) -> AsyncGenerator[Any, None]:
43
+ job = self._client.submit(*args, api_name=api_name)
44
+ while not job.done():
45
+ outputs: List[str] = job.outputs()
46
+ if not len(outputs):
47
+ await asyncio.sleep(0.1)
48
+ continue
49
+ newest_response = outputs[-1]
50
+ yield newest_response
51
+
52
+ e = job.exception()
53
+ if e and isinstance(e, BaseException):
54
+ raise RuntimeError from e
client/h2ogpt_client/_models.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ast
2
+ from typing import Any, Dict, List
3
+
4
+ from h2ogpt_client._gradio_client import GradioClientWrapper
5
+
6
+
7
+ class Model:
8
+ """Large language model in the h2oGPT server."""
9
+
10
+ def __init__(self, raw_info: Dict[str, Any]):
11
+ self._name = raw_info["base_model"]
12
+ self._raw_info = raw_info
13
+
14
+ @property
15
+ def name(self) -> str:
16
+ """Name of the model."""
17
+ return self._name
18
+
19
+ def __repr__(self) -> str:
20
+ return self.name.__repr__()
21
+
22
+ def __str__(self) -> str:
23
+ return self.name.__str__()
24
+
25
+
26
+ class Models:
27
+ """Interact with LL Models in h2oGPT."""
28
+
29
+ def __init__(self, client: GradioClientWrapper):
30
+ self._client = client
31
+
32
+ def list(self) -> List[Model]:
33
+ """List all models available in the h2oGPT server."""
34
+ models = ast.literal_eval(self._client.predict(api_name="/model_names"))
35
+ return [Model(m) for m in models]
client/h2ogpt_client/_server.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from h2ogpt_client._gradio_client import GradioClientWrapper
2
+
3
+
4
+ class Server:
5
+ """h2oGPT server."""
6
+
7
+ def __init__(self, client: GradioClientWrapper):
8
+ self._client = client
9
+
10
+ @property
11
+ def address(self) -> str:
12
+ """h2oGPT server address."""
13
+ return self._client._client.src
14
+
15
+ @property
16
+ def hash(self) -> str:
17
+ """h2oGPT server system hash."""
18
+ return str(self._client.predict(api_name="/system_hash"))
client/poetry.lock ADDED
@@ -0,0 +1,856 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand.
2
+
3
+ [[package]]
4
+ name = "anyio"
5
+ version = "3.6.2"
6
+ description = "High level compatibility layer for multiple asynchronous event loop implementations"
7
+ optional = false
8
+ python-versions = ">=3.6.2"
9
+ files = [
10
+ {file = "anyio-3.6.2-py3-none-any.whl", hash = "sha256:fbbe32bd270d2a2ef3ed1c5d45041250284e31fc0a4df4a5a6071842051a51e3"},
11
+ {file = "anyio-3.6.2.tar.gz", hash = "sha256:25ea0d673ae30af41a0c442f81cf3b38c7e79fdc7b60335a4c14e05eb0947421"},
12
+ ]
13
+
14
+ [package.dependencies]
15
+ idna = ">=2.8"
16
+ sniffio = ">=1.1"
17
+
18
+ [package.extras]
19
+ doc = ["packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"]
20
+ test = ["contextlib2", "coverage[toml] (>=4.5)", "hypothesis (>=4.0)", "mock (>=4)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (<0.15)", "uvloop (>=0.15)"]
21
+ trio = ["trio (>=0.16,<0.22)"]
22
+
23
+ [[package]]
24
+ name = "attrs"
25
+ version = "23.1.0"
26
+ description = "Classes Without Boilerplate"
27
+ optional = false
28
+ python-versions = ">=3.7"
29
+ files = [
30
+ {file = "attrs-23.1.0-py3-none-any.whl", hash = "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04"},
31
+ {file = "attrs-23.1.0.tar.gz", hash = "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"},
32
+ ]
33
+
34
+ [package.extras]
35
+ cov = ["attrs[tests]", "coverage[toml] (>=5.3)"]
36
+ dev = ["attrs[docs,tests]", "pre-commit"]
37
+ docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"]
38
+ tests = ["attrs[tests-no-zope]", "zope-interface"]
39
+ tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
40
+
41
+ [[package]]
42
+ name = "black"
43
+ version = "23.3.0"
44
+ description = "The uncompromising code formatter."
45
+ optional = false
46
+ python-versions = ">=3.7"
47
+ files = [
48
+ {file = "black-23.3.0-cp310-cp310-macosx_10_16_arm64.whl", hash = "sha256:0945e13506be58bf7db93ee5853243eb368ace1c08a24c65ce108986eac65915"},
49
+ {file = "black-23.3.0-cp310-cp310-macosx_10_16_universal2.whl", hash = "sha256:67de8d0c209eb5b330cce2469503de11bca4085880d62f1628bd9972cc3366b9"},
50
+ {file = "black-23.3.0-cp310-cp310-macosx_10_16_x86_64.whl", hash = "sha256:7c3eb7cea23904399866c55826b31c1f55bbcd3890ce22ff70466b907b6775c2"},
51
+ {file = "black-23.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:32daa9783106c28815d05b724238e30718f34155653d4d6e125dc7daec8e260c"},
52
+ {file = "black-23.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:35d1381d7a22cc5b2be2f72c7dfdae4072a3336060635718cc7e1ede24221d6c"},
53
+ {file = "black-23.3.0-cp311-cp311-macosx_10_16_arm64.whl", hash = "sha256:a8a968125d0a6a404842fa1bf0b349a568634f856aa08ffaff40ae0dfa52e7c6"},
54
+ {file = "black-23.3.0-cp311-cp311-macosx_10_16_universal2.whl", hash = "sha256:c7ab5790333c448903c4b721b59c0d80b11fe5e9803d8703e84dcb8da56fec1b"},
55
+ {file = "black-23.3.0-cp311-cp311-macosx_10_16_x86_64.whl", hash = "sha256:a6f6886c9869d4daae2d1715ce34a19bbc4b95006d20ed785ca00fa03cba312d"},
56
+ {file = "black-23.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f3c333ea1dd6771b2d3777482429864f8e258899f6ff05826c3a4fcc5ce3f70"},
57
+ {file = "black-23.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:11c410f71b876f961d1de77b9699ad19f939094c3a677323f43d7a29855fe326"},
58
+ {file = "black-23.3.0-cp37-cp37m-macosx_10_16_x86_64.whl", hash = "sha256:1d06691f1eb8de91cd1b322f21e3bfc9efe0c7ca1f0e1eb1db44ea367dff656b"},
59
+ {file = "black-23.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50cb33cac881766a5cd9913e10ff75b1e8eb71babf4c7104f2e9c52da1fb7de2"},
60
+ {file = "black-23.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:e114420bf26b90d4b9daa597351337762b63039752bdf72bf361364c1aa05925"},
61
+ {file = "black-23.3.0-cp38-cp38-macosx_10_16_arm64.whl", hash = "sha256:48f9d345675bb7fbc3dd85821b12487e1b9a75242028adad0333ce36ed2a6d27"},
62
+ {file = "black-23.3.0-cp38-cp38-macosx_10_16_universal2.whl", hash = "sha256:714290490c18fb0126baa0fca0a54ee795f7502b44177e1ce7624ba1c00f2331"},
63
+ {file = "black-23.3.0-cp38-cp38-macosx_10_16_x86_64.whl", hash = "sha256:064101748afa12ad2291c2b91c960be28b817c0c7eaa35bec09cc63aa56493c5"},
64
+ {file = "black-23.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:562bd3a70495facf56814293149e51aa1be9931567474993c7942ff7d3533961"},
65
+ {file = "black-23.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:e198cf27888ad6f4ff331ca1c48ffc038848ea9f031a3b40ba36aced7e22f2c8"},
66
+ {file = "black-23.3.0-cp39-cp39-macosx_10_16_arm64.whl", hash = "sha256:3238f2aacf827d18d26db07524e44741233ae09a584273aa059066d644ca7b30"},
67
+ {file = "black-23.3.0-cp39-cp39-macosx_10_16_universal2.whl", hash = "sha256:f0bd2f4a58d6666500542b26354978218a9babcdc972722f4bf90779524515f3"},
68
+ {file = "black-23.3.0-cp39-cp39-macosx_10_16_x86_64.whl", hash = "sha256:92c543f6854c28a3c7f39f4d9b7694f9a6eb9d3c5e2ece488c327b6e7ea9b266"},
69
+ {file = "black-23.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a150542a204124ed00683f0db1f5cf1c2aaaa9cc3495b7a3b5976fb136090ab"},
70
+ {file = "black-23.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:6b39abdfb402002b8a7d030ccc85cf5afff64ee90fa4c5aebc531e3ad0175ddb"},
71
+ {file = "black-23.3.0-py3-none-any.whl", hash = "sha256:ec751418022185b0c1bb7d7736e6933d40bbb14c14a0abcf9123d1b159f98dd4"},
72
+ {file = "black-23.3.0.tar.gz", hash = "sha256:1c7b8d606e728a41ea1ccbd7264677e494e87cf630e399262ced92d4a8dac940"},
73
+ ]
74
+
75
+ [package.dependencies]
76
+ click = ">=8.0.0"
77
+ mypy-extensions = ">=0.4.3"
78
+ packaging = ">=22.0"
79
+ pathspec = ">=0.9.0"
80
+ platformdirs = ">=2"
81
+ tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
82
+ typing-extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""}
83
+
84
+ [package.extras]
85
+ colorama = ["colorama (>=0.4.3)"]
86
+ d = ["aiohttp (>=3.7.4)"]
87
+ jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
88
+ uvloop = ["uvloop (>=0.15.2)"]
89
+
90
+ [[package]]
91
+ name = "certifi"
92
+ version = "2023.5.7"
93
+ description = "Python package for providing Mozilla's CA Bundle."
94
+ optional = false
95
+ python-versions = ">=3.6"
96
+ files = [
97
+ {file = "certifi-2023.5.7-py3-none-any.whl", hash = "sha256:c6c2e98f5c7869efca1f8916fed228dd91539f9f1b444c314c06eef02980c716"},
98
+ {file = "certifi-2023.5.7.tar.gz", hash = "sha256:0f0d56dc5a6ad56fd4ba36484d6cc34451e1c6548c61daad8c320169f91eddc7"},
99
+ ]
100
+
101
+ [[package]]
102
+ name = "charset-normalizer"
103
+ version = "3.1.0"
104
+ description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
105
+ optional = false
106
+ python-versions = ">=3.7.0"
107
+ files = [
108
+ {file = "charset-normalizer-3.1.0.tar.gz", hash = "sha256:34e0a2f9c370eb95597aae63bf85eb5e96826d81e3dcf88b8886012906f509b5"},
109
+ {file = "charset_normalizer-3.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e0ac8959c929593fee38da1c2b64ee9778733cdf03c482c9ff1d508b6b593b2b"},
110
+ {file = "charset_normalizer-3.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d7fc3fca01da18fbabe4625d64bb612b533533ed10045a2ac3dd194bfa656b60"},
111
+ {file = "charset_normalizer-3.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:04eefcee095f58eaabe6dc3cc2262f3bcd776d2c67005880894f447b3f2cb9c1"},
112
+ {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20064ead0717cf9a73a6d1e779b23d149b53daf971169289ed2ed43a71e8d3b0"},
113
+ {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1435ae15108b1cb6fffbcea2af3d468683b7afed0169ad718451f8db5d1aff6f"},
114
+ {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c84132a54c750fda57729d1e2599bb598f5fa0344085dbde5003ba429a4798c0"},
115
+ {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75f2568b4189dda1c567339b48cba4ac7384accb9c2a7ed655cd86b04055c795"},
116
+ {file = "charset_normalizer-3.1.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:11d3bcb7be35e7b1bba2c23beedac81ee893ac9871d0ba79effc7fc01167db6c"},
117
+ {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:891cf9b48776b5c61c700b55a598621fdb7b1e301a550365571e9624f270c203"},
118
+ {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:5f008525e02908b20e04707a4f704cd286d94718f48bb33edddc7d7b584dddc1"},
119
+ {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:b06f0d3bf045158d2fb8837c5785fe9ff9b8c93358be64461a1089f5da983137"},
120
+ {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:49919f8400b5e49e961f320c735388ee686a62327e773fa5b3ce6721f7e785ce"},
121
+ {file = "charset_normalizer-3.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:22908891a380d50738e1f978667536f6c6b526a2064156203d418f4856d6e86a"},
122
+ {file = "charset_normalizer-3.1.0-cp310-cp310-win32.whl", hash = "sha256:12d1a39aa6b8c6f6248bb54550efcc1c38ce0d8096a146638fd4738e42284448"},
123
+ {file = "charset_normalizer-3.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:65ed923f84a6844de5fd29726b888e58c62820e0769b76565480e1fdc3d062f8"},
124
+ {file = "charset_normalizer-3.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9a3267620866c9d17b959a84dd0bd2d45719b817245e49371ead79ed4f710d19"},
125
+ {file = "charset_normalizer-3.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6734e606355834f13445b6adc38b53c0fd45f1a56a9ba06c2058f86893ae8017"},
126
+ {file = "charset_normalizer-3.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f8303414c7b03f794347ad062c0516cee0e15f7a612abd0ce1e25caf6ceb47df"},
127
+ {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aaf53a6cebad0eae578f062c7d462155eada9c172bd8c4d250b8c1d8eb7f916a"},
128
+ {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3dc5b6a8ecfdc5748a7e429782598e4f17ef378e3e272eeb1340ea57c9109f41"},
129
+ {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e1b25e3ad6c909f398df8921780d6a3d120d8c09466720226fc621605b6f92b1"},
130
+ {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ca564606d2caafb0abe6d1b5311c2649e8071eb241b2d64e75a0d0065107e62"},
131
+ {file = "charset_normalizer-3.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b82fab78e0b1329e183a65260581de4375f619167478dddab510c6c6fb04d9b6"},
132
+ {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:bd7163182133c0c7701b25e604cf1611c0d87712e56e88e7ee5d72deab3e76b5"},
133
+ {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:11d117e6c63e8f495412d37e7dc2e2fff09c34b2d09dbe2bee3c6229577818be"},
134
+ {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:cf6511efa4801b9b38dc5546d7547d5b5c6ef4b081c60b23e4d941d0eba9cbeb"},
135
+ {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:abc1185d79f47c0a7aaf7e2412a0eb2c03b724581139193d2d82b3ad8cbb00ac"},
136
+ {file = "charset_normalizer-3.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:cb7b2ab0188829593b9de646545175547a70d9a6e2b63bf2cd87a0a391599324"},
137
+ {file = "charset_normalizer-3.1.0-cp311-cp311-win32.whl", hash = "sha256:c36bcbc0d5174a80d6cccf43a0ecaca44e81d25be4b7f90f0ed7bcfbb5a00909"},
138
+ {file = "charset_normalizer-3.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:cca4def576f47a09a943666b8f829606bcb17e2bc2d5911a46c8f8da45f56755"},
139
+ {file = "charset_normalizer-3.1.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:0c95f12b74681e9ae127728f7e5409cbbef9cd914d5896ef238cc779b8152373"},
140
+ {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fca62a8301b605b954ad2e9c3666f9d97f63872aa4efcae5492baca2056b74ab"},
141
+ {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ac0aa6cd53ab9a31d397f8303f92c42f534693528fafbdb997c82bae6e477ad9"},
142
+ {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c3af8e0f07399d3176b179f2e2634c3ce9c1301379a6b8c9c9aeecd481da494f"},
143
+ {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a5fc78f9e3f501a1614a98f7c54d3969f3ad9bba8ba3d9b438c3bc5d047dd28"},
144
+ {file = "charset_normalizer-3.1.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:628c985afb2c7d27a4800bfb609e03985aaecb42f955049957814e0491d4006d"},
145
+ {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:74db0052d985cf37fa111828d0dd230776ac99c740e1a758ad99094be4f1803d"},
146
+ {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:1e8fcdd8f672a1c4fc8d0bd3a2b576b152d2a349782d1eb0f6b8e52e9954731d"},
147
+ {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:04afa6387e2b282cf78ff3dbce20f0cc071c12dc8f685bd40960cc68644cfea6"},
148
+ {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:dd5653e67b149503c68c4018bf07e42eeed6b4e956b24c00ccdf93ac79cdff84"},
149
+ {file = "charset_normalizer-3.1.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d2686f91611f9e17f4548dbf050e75b079bbc2a82be565832bc8ea9047b61c8c"},
150
+ {file = "charset_normalizer-3.1.0-cp37-cp37m-win32.whl", hash = "sha256:4155b51ae05ed47199dc5b2a4e62abccb274cee6b01da5b895099b61b1982974"},
151
+ {file = "charset_normalizer-3.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:322102cdf1ab682ecc7d9b1c5eed4ec59657a65e1c146a0da342b78f4112db23"},
152
+ {file = "charset_normalizer-3.1.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:e633940f28c1e913615fd624fcdd72fdba807bf53ea6925d6a588e84e1151531"},
153
+ {file = "charset_normalizer-3.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3a06f32c9634a8705f4ca9946d667609f52cf130d5548881401f1eb2c39b1e2c"},
154
+ {file = "charset_normalizer-3.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7381c66e0561c5757ffe616af869b916c8b4e42b367ab29fedc98481d1e74e14"},
155
+ {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3573d376454d956553c356df45bb824262c397c6e26ce43e8203c4c540ee0acb"},
156
+ {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e89df2958e5159b811af9ff0f92614dabf4ff617c03a4c1c6ff53bf1c399e0e1"},
157
+ {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:78cacd03e79d009d95635e7d6ff12c21eb89b894c354bd2b2ed0b4763373693b"},
158
+ {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de5695a6f1d8340b12a5d6d4484290ee74d61e467c39ff03b39e30df62cf83a0"},
159
+ {file = "charset_normalizer-3.1.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c60b9c202d00052183c9be85e5eaf18a4ada0a47d188a83c8f5c5b23252f649"},
160
+ {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:f645caaf0008bacf349875a974220f1f1da349c5dbe7c4ec93048cdc785a3326"},
161
+ {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:ea9f9c6034ea2d93d9147818f17c2a0860d41b71c38b9ce4d55f21b6f9165a11"},
162
+ {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:80d1543d58bd3d6c271b66abf454d437a438dff01c3e62fdbcd68f2a11310d4b"},
163
+ {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:73dc03a6a7e30b7edc5b01b601e53e7fc924b04e1835e8e407c12c037e81adbd"},
164
+ {file = "charset_normalizer-3.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6f5c2e7bc8a4bf7c426599765b1bd33217ec84023033672c1e9a8b35eaeaaaf8"},
165
+ {file = "charset_normalizer-3.1.0-cp38-cp38-win32.whl", hash = "sha256:12a2b561af122e3d94cdb97fe6fb2bb2b82cef0cdca131646fdb940a1eda04f0"},
166
+ {file = "charset_normalizer-3.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:3160a0fd9754aab7d47f95a6b63ab355388d890163eb03b2d2b87ab0a30cfa59"},
167
+ {file = "charset_normalizer-3.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:38e812a197bf8e71a59fe55b757a84c1f946d0ac114acafaafaf21667a7e169e"},
168
+ {file = "charset_normalizer-3.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6baf0baf0d5d265fa7944feb9f7451cc316bfe30e8df1a61b1bb08577c554f31"},
169
+ {file = "charset_normalizer-3.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8f25e17ab3039b05f762b0a55ae0b3632b2e073d9c8fc88e89aca31a6198e88f"},
170
+ {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3747443b6a904001473370d7810aa19c3a180ccd52a7157aacc264a5ac79265e"},
171
+ {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b116502087ce8a6b7a5f1814568ccbd0e9f6cfd99948aa59b0e241dc57cf739f"},
172
+ {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d16fd5252f883eb074ca55cb622bc0bee49b979ae4e8639fff6ca3ff44f9f854"},
173
+ {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21fa558996782fc226b529fdd2ed7866c2c6ec91cee82735c98a197fae39f706"},
174
+ {file = "charset_normalizer-3.1.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6f6c7a8a57e9405cad7485f4c9d3172ae486cfef1344b5ddd8e5239582d7355e"},
175
+ {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ac3775e3311661d4adace3697a52ac0bab17edd166087d493b52d4f4f553f9f0"},
176
+ {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:10c93628d7497c81686e8e5e557aafa78f230cd9e77dd0c40032ef90c18f2230"},
177
+ {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:6f4f4668e1831850ebcc2fd0b1cd11721947b6dc7c00bf1c6bd3c929ae14f2c7"},
178
+ {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:0be65ccf618c1e7ac9b849c315cc2e8a8751d9cfdaa43027d4f6624bd587ab7e"},
179
+ {file = "charset_normalizer-3.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:53d0a3fa5f8af98a1e261de6a3943ca631c526635eb5817a87a59d9a57ebf48f"},
180
+ {file = "charset_normalizer-3.1.0-cp39-cp39-win32.whl", hash = "sha256:a04f86f41a8916fe45ac5024ec477f41f886b3c435da2d4e3d2709b22ab02af1"},
181
+ {file = "charset_normalizer-3.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:830d2948a5ec37c386d3170c483063798d7879037492540f10a475e3fd6f244b"},
182
+ {file = "charset_normalizer-3.1.0-py3-none-any.whl", hash = "sha256:3d9098b479e78c85080c98e1e35ff40b4a31d8953102bb0fd7d1b6f8a2111a3d"},
183
+ ]
184
+
185
+ [[package]]
186
+ name = "click"
187
+ version = "8.1.3"
188
+ description = "Composable command line interface toolkit"
189
+ optional = false
190
+ python-versions = ">=3.7"
191
+ files = [
192
+ {file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"},
193
+ {file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"},
194
+ ]
195
+
196
+ [package.dependencies]
197
+ colorama = {version = "*", markers = "platform_system == \"Windows\""}
198
+
199
+ [[package]]
200
+ name = "colorama"
201
+ version = "0.4.6"
202
+ description = "Cross-platform colored terminal text."
203
+ optional = false
204
+ python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
205
+ files = [
206
+ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
207
+ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
208
+ ]
209
+
210
+ [[package]]
211
+ name = "exceptiongroup"
212
+ version = "1.1.1"
213
+ description = "Backport of PEP 654 (exception groups)"
214
+ optional = false
215
+ python-versions = ">=3.7"
216
+ files = [
217
+ {file = "exceptiongroup-1.1.1-py3-none-any.whl", hash = "sha256:232c37c63e4f682982c8b6459f33a8981039e5fb8756b2074364e5055c498c9e"},
218
+ {file = "exceptiongroup-1.1.1.tar.gz", hash = "sha256:d484c3090ba2889ae2928419117447a14daf3c1231d5e30d0aae34f354f01785"},
219
+ ]
220
+
221
+ [package.extras]
222
+ test = ["pytest (>=6)"]
223
+
224
+ [[package]]
225
+ name = "filelock"
226
+ version = "3.12.0"
227
+ description = "A platform independent file lock."
228
+ optional = false
229
+ python-versions = ">=3.7"
230
+ files = [
231
+ {file = "filelock-3.12.0-py3-none-any.whl", hash = "sha256:ad98852315c2ab702aeb628412cbf7e95b7ce8c3bf9565670b4eaecf1db370a9"},
232
+ {file = "filelock-3.12.0.tar.gz", hash = "sha256:fc03ae43288c013d2ea83c8597001b1129db351aad9c57fe2409327916b8e718"},
233
+ ]
234
+
235
+ [package.extras]
236
+ docs = ["furo (>=2023.3.27)", "sphinx (>=6.1.3)", "sphinx-autodoc-typehints (>=1.23,!=1.23.4)"]
237
+ testing = ["covdefaults (>=2.3)", "coverage (>=7.2.3)", "diff-cover (>=7.5)", "pytest (>=7.3.1)", "pytest-cov (>=4)", "pytest-mock (>=3.10)", "pytest-timeout (>=2.1)"]
238
+
239
+ [[package]]
240
+ name = "flake8"
241
+ version = "5.0.4"
242
+ description = "the modular source code checker: pep8 pyflakes and co"
243
+ optional = false
244
+ python-versions = ">=3.6.1"
245
+ files = [
246
+ {file = "flake8-5.0.4-py2.py3-none-any.whl", hash = "sha256:7a1cf6b73744f5806ab95e526f6f0d8c01c66d7bbe349562d22dfca20610b248"},
247
+ {file = "flake8-5.0.4.tar.gz", hash = "sha256:6fbe320aad8d6b95cec8b8e47bc933004678dc63095be98528b7bdd2a9f510db"},
248
+ ]
249
+
250
+ [package.dependencies]
251
+ mccabe = ">=0.7.0,<0.8.0"
252
+ pycodestyle = ">=2.9.0,<2.10.0"
253
+ pyflakes = ">=2.5.0,<2.6.0"
254
+
255
+ [[package]]
256
+ name = "flake8-pyproject"
257
+ version = "1.2.3"
258
+ description = "Flake8 plug-in loading the configuration from pyproject.toml"
259
+ optional = false
260
+ python-versions = ">= 3.6"
261
+ files = [
262
+ {file = "flake8_pyproject-1.2.3-py3-none-any.whl", hash = "sha256:6249fe53545205af5e76837644dc80b4c10037e73a0e5db87ff562d75fb5bd4a"},
263
+ ]
264
+
265
+ [package.dependencies]
266
+ Flake8 = ">=5"
267
+ TOMLi = {version = "*", markers = "python_version < \"3.11\""}
268
+
269
+ [package.extras]
270
+ dev = ["pyTest", "pyTest-cov"]
271
+
272
+ [[package]]
273
+ name = "fsspec"
274
+ version = "2023.5.0"
275
+ description = "File-system specification"
276
+ optional = false
277
+ python-versions = ">=3.8"
278
+ files = [
279
+ {file = "fsspec-2023.5.0-py3-none-any.whl", hash = "sha256:51a4ad01a5bb66fcc58036e288c0d53d3975a0df2a5dc59a93b59bade0391f2a"},
280
+ {file = "fsspec-2023.5.0.tar.gz", hash = "sha256:b3b56e00fb93ea321bc9e5d9cf6f8522a0198b20eb24e02774d329e9c6fb84ce"},
281
+ ]
282
+
283
+ [package.extras]
284
+ abfs = ["adlfs"]
285
+ adl = ["adlfs"]
286
+ arrow = ["pyarrow (>=1)"]
287
+ dask = ["dask", "distributed"]
288
+ devel = ["pytest", "pytest-cov"]
289
+ dropbox = ["dropbox", "dropboxdrivefs", "requests"]
290
+ full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"]
291
+ fuse = ["fusepy"]
292
+ gcs = ["gcsfs"]
293
+ git = ["pygit2"]
294
+ github = ["requests"]
295
+ gs = ["gcsfs"]
296
+ gui = ["panel"]
297
+ hdfs = ["pyarrow (>=1)"]
298
+ http = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "requests"]
299
+ libarchive = ["libarchive-c"]
300
+ oci = ["ocifs"]
301
+ s3 = ["s3fs"]
302
+ sftp = ["paramiko"]
303
+ smb = ["smbprotocol"]
304
+ ssh = ["paramiko"]
305
+ tqdm = ["tqdm"]
306
+
307
+ [[package]]
308
+ name = "gradio-client"
309
+ version = "0.6.1"
310
+ description = "Python library for easily interacting with trained machine learning models"
311
+ optional = false
312
+ python-versions = ">=3.8"
313
+ files = [
314
+ {file = "gradio_client-0.6.1-py3-none-any.whl", hash = "sha256:2f36a8467622f748539c84d142f1a71df6b83666d017ee4fb6ef8a2696f70f66"},
315
+ {file = "gradio_client-0.6.1.tar.gz", hash = "sha256:2f36a8467622f748539c84d142f1a71df6b83666d017ee4fb6ef8a2696f70f66"},
316
+ ]
317
+
318
+ [package.dependencies]
319
+ fsspec = "*"
320
+ httpx = "*"
321
+ huggingface-hub = ">=0.13.0"
322
+ packaging = "*"
323
+ requests = ">=2.0,<3.0"
324
+ typing-extensions = ">=4.0,<5.0"
325
+ websockets = ">=10.0,<12.0"
326
+
327
+ [[package]]
328
+ name = "h11"
329
+ version = "0.14.0"
330
+ description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
331
+ optional = false
332
+ python-versions = ">=3.7"
333
+ files = [
334
+ {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"},
335
+ {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
336
+ ]
337
+
338
+ [[package]]
339
+ name = "httpcore"
340
+ version = "0.17.0"
341
+ description = "A minimal low-level HTTP client."
342
+ optional = false
343
+ python-versions = ">=3.7"
344
+ files = [
345
+ {file = "httpcore-0.17.0-py3-none-any.whl", hash = "sha256:0fdfea45e94f0c9fd96eab9286077f9ff788dd186635ae61b312693e4d943599"},
346
+ {file = "httpcore-0.17.0.tar.gz", hash = "sha256:cc045a3241afbf60ce056202301b4d8b6af08845e3294055eb26b09913ef903c"},
347
+ ]
348
+
349
+ [package.dependencies]
350
+ anyio = ">=3.0,<5.0"
351
+ certifi = "*"
352
+ h11 = ">=0.13,<0.15"
353
+ sniffio = "==1.*"
354
+
355
+ [package.extras]
356
+ http2 = ["h2 (>=3,<5)"]
357
+ socks = ["socksio (==1.*)"]
358
+
359
+ [[package]]
360
+ name = "httpx"
361
+ version = "0.24.0"
362
+ description = "The next generation HTTP client."
363
+ optional = false
364
+ python-versions = ">=3.7"
365
+ files = [
366
+ {file = "httpx-0.24.0-py3-none-any.whl", hash = "sha256:447556b50c1921c351ea54b4fe79d91b724ed2b027462ab9a329465d147d5a4e"},
367
+ {file = "httpx-0.24.0.tar.gz", hash = "sha256:507d676fc3e26110d41df7d35ebd8b3b8585052450f4097401c9be59d928c63e"},
368
+ ]
369
+
370
+ [package.dependencies]
371
+ certifi = "*"
372
+ httpcore = ">=0.15.0,<0.18.0"
373
+ idna = "*"
374
+ sniffio = "*"
375
+
376
+ [package.extras]
377
+ brotli = ["brotli", "brotlicffi"]
378
+ cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"]
379
+ http2 = ["h2 (>=3,<5)"]
380
+ socks = ["socksio (==1.*)"]
381
+
382
+ [[package]]
383
+ name = "huggingface-hub"
384
+ version = "0.16.4"
385
+ description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
386
+ optional = false
387
+ python-versions = ">=3.7.0"
388
+ files = [
389
+ {file = "huggingface_hub-0.16.4-py3-none-any.whl", hash = "sha256:608c7d4f3d368b326d1747f91523dbd1f692871e8e2e7a4750314a2dd8b63e14"},
390
+ {file = "huggingface_hub-0.16.4.tar.gz", hash = "sha256:608c7d4f3d368b326d1747f91523dbd1f692871e8e2e7a4750314a2dd8b63e14"},
391
+ ]
392
+
393
+ [package.dependencies]
394
+ filelock = "*"
395
+ fsspec = "*"
396
+ packaging = ">=20.9"
397
+ pyyaml = ">=5.1"
398
+ requests = "*"
399
+ tqdm = ">=4.42.1"
400
+ typing-extensions = ">=3.7.4.3"
401
+
402
+ [package.extras]
403
+ all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "black (>=23.1,<24.0)", "gradio", "jedi", "mypy (==0.982)", "pytest", "pytest-cov", "pytest-env", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"]
404
+ cli = ["InquirerPy (==0.3.4)"]
405
+ dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "black (>=23.1,<24.0)", "gradio", "jedi", "mypy (==0.982)", "pytest", "pytest-cov", "pytest-env", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"]
406
+ fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"]
407
+ quality = ["black (>=23.1,<24.0)", "mypy (==0.982)", "ruff (>=0.0.241)"]
408
+ tensorflow = ["graphviz", "pydot", "tensorflow"]
409
+ testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "gradio", "jedi", "pytest", "pytest-cov", "pytest-env", "pytest-xdist", "soundfile"]
410
+ torch = ["torch"]
411
+ typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"]
412
+
413
+ [[package]]
414
+ name = "idna"
415
+ version = "3.4"
416
+ description = "Internationalized Domain Names in Applications (IDNA)"
417
+ optional = false
418
+ python-versions = ">=3.5"
419
+ files = [
420
+ {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"},
421
+ {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"},
422
+ ]
423
+
424
+ [[package]]
425
+ name = "iniconfig"
426
+ version = "2.0.0"
427
+ description = "brain-dead simple config-ini parsing"
428
+ optional = false
429
+ python-versions = ">=3.7"
430
+ files = [
431
+ {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"},
432
+ {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
433
+ ]
434
+
435
+ [[package]]
436
+ name = "isort"
437
+ version = "5.12.0"
438
+ description = "A Python utility / library to sort Python imports."
439
+ optional = false
440
+ python-versions = ">=3.8.0"
441
+ files = [
442
+ {file = "isort-5.12.0-py3-none-any.whl", hash = "sha256:f84c2818376e66cf843d497486ea8fed8700b340f308f076c6fb1229dff318b6"},
443
+ {file = "isort-5.12.0.tar.gz", hash = "sha256:8bef7dde241278824a6d83f44a544709b065191b95b6e50894bdc722fcba0504"},
444
+ ]
445
+
446
+ [package.extras]
447
+ colors = ["colorama (>=0.4.3)"]
448
+ pipfile-deprecated-finder = ["pip-shims (>=0.5.2)", "pipreqs", "requirementslib"]
449
+ plugins = ["setuptools"]
450
+ requirements-deprecated-finder = ["pip-api", "pipreqs"]
451
+
452
+ [[package]]
453
+ name = "mccabe"
454
+ version = "0.7.0"
455
+ description = "McCabe checker, plugin for flake8"
456
+ optional = false
457
+ python-versions = ">=3.6"
458
+ files = [
459
+ {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"},
460
+ {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"},
461
+ ]
462
+
463
+ [[package]]
464
+ name = "mypy"
465
+ version = "1.3.0"
466
+ description = "Optional static typing for Python"
467
+ optional = false
468
+ python-versions = ">=3.7"
469
+ files = [
470
+ {file = "mypy-1.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c1eb485cea53f4f5284e5baf92902cd0088b24984f4209e25981cc359d64448d"},
471
+ {file = "mypy-1.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4c99c3ecf223cf2952638da9cd82793d8f3c0c5fa8b6ae2b2d9ed1e1ff51ba85"},
472
+ {file = "mypy-1.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:550a8b3a19bb6589679a7c3c31f64312e7ff482a816c96e0cecec9ad3a7564dd"},
473
+ {file = "mypy-1.3.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:cbc07246253b9e3d7d74c9ff948cd0fd7a71afcc2b77c7f0a59c26e9395cb152"},
474
+ {file = "mypy-1.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:a22435632710a4fcf8acf86cbd0d69f68ac389a3892cb23fbad176d1cddaf228"},
475
+ {file = "mypy-1.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6e33bb8b2613614a33dff70565f4c803f889ebd2f859466e42b46e1df76018dd"},
476
+ {file = "mypy-1.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7d23370d2a6b7a71dc65d1266f9a34e4cde9e8e21511322415db4b26f46f6b8c"},
477
+ {file = "mypy-1.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:658fe7b674769a0770d4b26cb4d6f005e88a442fe82446f020be8e5f5efb2fae"},
478
+ {file = "mypy-1.3.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6e42d29e324cdda61daaec2336c42512e59c7c375340bd202efa1fe0f7b8f8ca"},
479
+ {file = "mypy-1.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:d0b6c62206e04061e27009481cb0ec966f7d6172b5b936f3ead3d74f29fe3dcf"},
480
+ {file = "mypy-1.3.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:76ec771e2342f1b558c36d49900dfe81d140361dd0d2df6cd71b3db1be155409"},
481
+ {file = "mypy-1.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ebc95f8386314272bbc817026f8ce8f4f0d2ef7ae44f947c4664efac9adec929"},
482
+ {file = "mypy-1.3.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:faff86aa10c1aa4a10e1a301de160f3d8fc8703b88c7e98de46b531ff1276a9a"},
483
+ {file = "mypy-1.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:8c5979d0deb27e0f4479bee18ea0f83732a893e81b78e62e2dda3e7e518c92ee"},
484
+ {file = "mypy-1.3.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c5d2cc54175bab47011b09688b418db71403aefad07cbcd62d44010543fc143f"},
485
+ {file = "mypy-1.3.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:87df44954c31d86df96c8bd6e80dfcd773473e877ac6176a8e29898bfb3501cb"},
486
+ {file = "mypy-1.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:473117e310febe632ddf10e745a355714e771ffe534f06db40702775056614c4"},
487
+ {file = "mypy-1.3.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:74bc9b6e0e79808bf8678d7678b2ae3736ea72d56eede3820bd3849823e7f305"},
488
+ {file = "mypy-1.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:44797d031a41516fcf5cbfa652265bb994e53e51994c1bd649ffcd0c3a7eccbf"},
489
+ {file = "mypy-1.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ddae0f39ca146972ff6bb4399f3b2943884a774b8771ea0a8f50e971f5ea5ba8"},
490
+ {file = "mypy-1.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1c4c42c60a8103ead4c1c060ac3cdd3ff01e18fddce6f1016e08939647a0e703"},
491
+ {file = "mypy-1.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e86c2c6852f62f8f2b24cb7a613ebe8e0c7dc1402c61d36a609174f63e0ff017"},
492
+ {file = "mypy-1.3.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:f9dca1e257d4cc129517779226753dbefb4f2266c4eaad610fc15c6a7e14283e"},
493
+ {file = "mypy-1.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:95d8d31a7713510685b05fbb18d6ac287a56c8f6554d88c19e73f724a445448a"},
494
+ {file = "mypy-1.3.0-py3-none-any.whl", hash = "sha256:a8763e72d5d9574d45ce5881962bc8e9046bf7b375b0abf031f3e6811732a897"},
495
+ {file = "mypy-1.3.0.tar.gz", hash = "sha256:e1f4d16e296f5135624b34e8fb741eb0eadedca90862405b1f1fde2040b9bd11"},
496
+ ]
497
+
498
+ [package.dependencies]
499
+ mypy-extensions = ">=1.0.0"
500
+ tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
501
+ typing-extensions = ">=3.10"
502
+
503
+ [package.extras]
504
+ dmypy = ["psutil (>=4.0)"]
505
+ install-types = ["pip"]
506
+ python2 = ["typed-ast (>=1.4.0,<2)"]
507
+ reports = ["lxml"]
508
+
509
+ [[package]]
510
+ name = "mypy-extensions"
511
+ version = "1.0.0"
512
+ description = "Type system extensions for programs checked with the mypy type checker."
513
+ optional = false
514
+ python-versions = ">=3.5"
515
+ files = [
516
+ {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"},
517
+ {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
518
+ ]
519
+
520
+ [[package]]
521
+ name = "packaging"
522
+ version = "23.1"
523
+ description = "Core utilities for Python packages"
524
+ optional = false
525
+ python-versions = ">=3.7"
526
+ files = [
527
+ {file = "packaging-23.1-py3-none-any.whl", hash = "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61"},
528
+ {file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"},
529
+ ]
530
+
531
+ [[package]]
532
+ name = "pathspec"
533
+ version = "0.11.1"
534
+ description = "Utility library for gitignore style pattern matching of file paths."
535
+ optional = false
536
+ python-versions = ">=3.7"
537
+ files = [
538
+ {file = "pathspec-0.11.1-py3-none-any.whl", hash = "sha256:d8af70af76652554bd134c22b3e8a1cc46ed7d91edcdd721ef1a0c51a84a5293"},
539
+ {file = "pathspec-0.11.1.tar.gz", hash = "sha256:2798de800fa92780e33acca925945e9a19a133b715067cf165b8866c15a31687"},
540
+ ]
541
+
542
+ [[package]]
543
+ name = "platformdirs"
544
+ version = "3.5.0"
545
+ description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
546
+ optional = false
547
+ python-versions = ">=3.7"
548
+ files = [
549
+ {file = "platformdirs-3.5.0-py3-none-any.whl", hash = "sha256:47692bc24c1958e8b0f13dd727307cff1db103fca36399f457da8e05f222fdc4"},
550
+ {file = "platformdirs-3.5.0.tar.gz", hash = "sha256:7954a68d0ba23558d753f73437c55f89027cf8f5108c19844d4b82e5af396335"},
551
+ ]
552
+
553
+ [package.extras]
554
+ docs = ["furo (>=2023.3.27)", "proselint (>=0.13)", "sphinx (>=6.1.3)", "sphinx-autodoc-typehints (>=1.23,!=1.23.4)"]
555
+ test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.3.1)", "pytest-cov (>=4)", "pytest-mock (>=3.10)"]
556
+
557
+ [[package]]
558
+ name = "pluggy"
559
+ version = "1.0.0"
560
+ description = "plugin and hook calling mechanisms for python"
561
+ optional = false
562
+ python-versions = ">=3.6"
563
+ files = [
564
+ {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"},
565
+ {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"},
566
+ ]
567
+
568
+ [package.extras]
569
+ dev = ["pre-commit", "tox"]
570
+ testing = ["pytest", "pytest-benchmark"]
571
+
572
+ [[package]]
573
+ name = "pycodestyle"
574
+ version = "2.9.1"
575
+ description = "Python style guide checker"
576
+ optional = false
577
+ python-versions = ">=3.6"
578
+ files = [
579
+ {file = "pycodestyle-2.9.1-py2.py3-none-any.whl", hash = "sha256:d1735fc58b418fd7c5f658d28d943854f8a849b01a5d0a1e6f3f3fdd0166804b"},
580
+ {file = "pycodestyle-2.9.1.tar.gz", hash = "sha256:2c9607871d58c76354b697b42f5d57e1ada7d261c261efac224b664affdc5785"},
581
+ ]
582
+
583
+ [[package]]
584
+ name = "pyflakes"
585
+ version = "2.5.0"
586
+ description = "passive checker of Python programs"
587
+ optional = false
588
+ python-versions = ">=3.6"
589
+ files = [
590
+ {file = "pyflakes-2.5.0-py2.py3-none-any.whl", hash = "sha256:4579f67d887f804e67edb544428f264b7b24f435b263c4614f384135cea553d2"},
591
+ {file = "pyflakes-2.5.0.tar.gz", hash = "sha256:491feb020dca48ccc562a8c0cbe8df07ee13078df59813b83959cbdada312ea3"},
592
+ ]
593
+
594
+ [[package]]
595
+ name = "pytest"
596
+ version = "7.2.2"
597
+ description = "pytest: simple powerful testing with Python"
598
+ optional = false
599
+ python-versions = ">=3.7"
600
+ files = [
601
+ {file = "pytest-7.2.2-py3-none-any.whl", hash = "sha256:130328f552dcfac0b1cec75c12e3f005619dc5f874f0a06e8ff7263f0ee6225e"},
602
+ {file = "pytest-7.2.2.tar.gz", hash = "sha256:c99ab0c73aceb050f68929bc93af19ab6db0558791c6a0715723abe9d0ade9d4"},
603
+ ]
604
+
605
+ [package.dependencies]
606
+ attrs = ">=19.2.0"
607
+ colorama = {version = "*", markers = "sys_platform == \"win32\""}
608
+ exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""}
609
+ iniconfig = "*"
610
+ packaging = "*"
611
+ pluggy = ">=0.12,<2.0"
612
+ tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""}
613
+
614
+ [package.extras]
615
+ testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"]
616
+
617
+ [[package]]
618
+ name = "pytest-asyncio"
619
+ version = "0.21.0"
620
+ description = "Pytest support for asyncio"
621
+ optional = false
622
+ python-versions = ">=3.7"
623
+ files = [
624
+ {file = "pytest-asyncio-0.21.0.tar.gz", hash = "sha256:2b38a496aef56f56b0e87557ec313e11e1ab9276fc3863f6a7be0f1d0e415e1b"},
625
+ {file = "pytest_asyncio-0.21.0-py3-none-any.whl", hash = "sha256:f2b3366b7cd501a4056858bd39349d5af19742aed2d81660b7998b6341c7eb9c"},
626
+ ]
627
+
628
+ [package.dependencies]
629
+ pytest = ">=7.0.0"
630
+
631
+ [package.extras]
632
+ docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1.0)"]
633
+ testing = ["coverage (>=6.2)", "flaky (>=3.5.0)", "hypothesis (>=5.7.1)", "mypy (>=0.931)", "pytest-trio (>=0.7.0)"]
634
+
635
+ [[package]]
636
+ name = "pyyaml"
637
+ version = "6.0"
638
+ description = "YAML parser and emitter for Python"
639
+ optional = false
640
+ python-versions = ">=3.6"
641
+ files = [
642
+ {file = "PyYAML-6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d4db7c7aef085872ef65a8fd7d6d09a14ae91f691dec3e87ee5ee0539d516f53"},
643
+ {file = "PyYAML-6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9df7ed3b3d2e0ecfe09e14741b857df43adb5a3ddadc919a2d94fbdf78fea53c"},
644
+ {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77f396e6ef4c73fdc33a9157446466f1cff553d979bd00ecb64385760c6babdc"},
645
+ {file = "PyYAML-6.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a80a78046a72361de73f8f395f1f1e49f956c6be882eed58505a15f3e430962b"},
646
+ {file = "PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f84fbc98b019fef2ee9a1cb3ce93e3187a6df0b2538a651bfb890254ba9f90b5"},
647
+ {file = "PyYAML-6.0-cp310-cp310-win32.whl", hash = "sha256:2cd5df3de48857ed0544b34e2d40e9fac445930039f3cfe4bcc592a1f836d513"},
648
+ {file = "PyYAML-6.0-cp310-cp310-win_amd64.whl", hash = "sha256:daf496c58a8c52083df09b80c860005194014c3698698d1a57cbcfa182142a3a"},
649
+ {file = "PyYAML-6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d4b0ba9512519522b118090257be113b9468d804b19d63c71dbcf4a48fa32358"},
650
+ {file = "PyYAML-6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:81957921f441d50af23654aa6c5e5eaf9b06aba7f0a19c18a538dc7ef291c5a1"},
651
+ {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:afa17f5bc4d1b10afd4466fd3a44dc0e245382deca5b3c353d8b757f9e3ecb8d"},
652
+ {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dbad0e9d368bb989f4515da330b88a057617d16b6a8245084f1b05400f24609f"},
653
+ {file = "PyYAML-6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:432557aa2c09802be39460360ddffd48156e30721f5e8d917f01d31694216782"},
654
+ {file = "PyYAML-6.0-cp311-cp311-win32.whl", hash = "sha256:bfaef573a63ba8923503d27530362590ff4f576c626d86a9fed95822a8255fd7"},
655
+ {file = "PyYAML-6.0-cp311-cp311-win_amd64.whl", hash = "sha256:01b45c0191e6d66c470b6cf1b9531a771a83c1c4208272ead47a3ae4f2f603bf"},
656
+ {file = "PyYAML-6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:897b80890765f037df3403d22bab41627ca8811ae55e9a722fd0392850ec4d86"},
657
+ {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50602afada6d6cbfad699b0c7bb50d5ccffa7e46a3d738092afddc1f9758427f"},
658
+ {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:48c346915c114f5fdb3ead70312bd042a953a8ce5c7106d5bfb1a5254e47da92"},
659
+ {file = "PyYAML-6.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:98c4d36e99714e55cfbaaee6dd5badbc9a1ec339ebfc3b1f52e293aee6bb71a4"},
660
+ {file = "PyYAML-6.0-cp36-cp36m-win32.whl", hash = "sha256:0283c35a6a9fbf047493e3a0ce8d79ef5030852c51e9d911a27badfde0605293"},
661
+ {file = "PyYAML-6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:07751360502caac1c067a8132d150cf3d61339af5691fe9e87803040dbc5db57"},
662
+ {file = "PyYAML-6.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:819b3830a1543db06c4d4b865e70ded25be52a2e0631ccd2f6a47a2822f2fd7c"},
663
+ {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:473f9edb243cb1935ab5a084eb238d842fb8f404ed2193a915d1784b5a6b5fc0"},
664
+ {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0ce82d761c532fe4ec3f87fc45688bdd3a4c1dc5e0b4a19814b9009a29baefd4"},
665
+ {file = "PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:231710d57adfd809ef5d34183b8ed1eeae3f76459c18fb4a0b373ad56bedcdd9"},
666
+ {file = "PyYAML-6.0-cp37-cp37m-win32.whl", hash = "sha256:c5687b8d43cf58545ade1fe3e055f70eac7a5a1a0bf42824308d868289a95737"},
667
+ {file = "PyYAML-6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:d15a181d1ecd0d4270dc32edb46f7cb7733c7c508857278d3d378d14d606db2d"},
668
+ {file = "PyYAML-6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0b4624f379dab24d3725ffde76559cff63d9ec94e1736b556dacdfebe5ab6d4b"},
669
+ {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:213c60cd50106436cc818accf5baa1aba61c0189ff610f64f4a3e8c6726218ba"},
670
+ {file = "PyYAML-6.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9fa600030013c4de8165339db93d182b9431076eb98eb40ee068700c9c813e34"},
671
+ {file = "PyYAML-6.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:277a0ef2981ca40581a47093e9e2d13b3f1fbbeffae064c1d21bfceba2030287"},
672
+ {file = "PyYAML-6.0-cp38-cp38-win32.whl", hash = "sha256:d4eccecf9adf6fbcc6861a38015c2a64f38b9d94838ac1810a9023a0609e1b78"},
673
+ {file = "PyYAML-6.0-cp38-cp38-win_amd64.whl", hash = "sha256:1e4747bc279b4f613a09eb64bba2ba602d8a6664c6ce6396a4d0cd413a50ce07"},
674
+ {file = "PyYAML-6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:055d937d65826939cb044fc8c9b08889e8c743fdc6a32b33e2390f66013e449b"},
675
+ {file = "PyYAML-6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e61ceaab6f49fb8bdfaa0f92c4b57bcfbea54c09277b1b4f7ac376bfb7a7c174"},
676
+ {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d67d839ede4ed1b28a4e8909735fc992a923cdb84e618544973d7dfc71540803"},
677
+ {file = "PyYAML-6.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cba8c411ef271aa037d7357a2bc8f9ee8b58b9965831d9e51baf703280dc73d3"},
678
+ {file = "PyYAML-6.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:40527857252b61eacd1d9af500c3337ba8deb8fc298940291486c465c8b46ec0"},
679
+ {file = "PyYAML-6.0-cp39-cp39-win32.whl", hash = "sha256:b5b9eccad747aabaaffbc6064800670f0c297e52c12754eb1d976c57e4f74dcb"},
680
+ {file = "PyYAML-6.0-cp39-cp39-win_amd64.whl", hash = "sha256:b3d267842bf12586ba6c734f89d1f5b871df0273157918b0ccefa29deb05c21c"},
681
+ {file = "PyYAML-6.0.tar.gz", hash = "sha256:68fb519c14306fec9720a2a5b45bc9f0c8d1b9c72adf45c37baedfcd949c35a2"},
682
+ ]
683
+
684
+ [[package]]
685
+ name = "requests"
686
+ version = "2.30.0"
687
+ description = "Python HTTP for Humans."
688
+ optional = false
689
+ python-versions = ">=3.7"
690
+ files = [
691
+ {file = "requests-2.30.0-py3-none-any.whl", hash = "sha256:10e94cc4f3121ee6da529d358cdaeaff2f1c409cd377dbc72b825852f2f7e294"},
692
+ {file = "requests-2.30.0.tar.gz", hash = "sha256:239d7d4458afcb28a692cdd298d87542235f4ca8d36d03a15bfc128a6559a2f4"},
693
+ ]
694
+
695
+ [package.dependencies]
696
+ certifi = ">=2017.4.17"
697
+ charset-normalizer = ">=2,<4"
698
+ idna = ">=2.5,<4"
699
+ urllib3 = ">=1.21.1,<3"
700
+
701
+ [package.extras]
702
+ socks = ["PySocks (>=1.5.6,!=1.5.7)"]
703
+ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
704
+
705
+ [[package]]
706
+ name = "sniffio"
707
+ version = "1.3.0"
708
+ description = "Sniff out which async library your code is running under"
709
+ optional = false
710
+ python-versions = ">=3.7"
711
+ files = [
712
+ {file = "sniffio-1.3.0-py3-none-any.whl", hash = "sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384"},
713
+ {file = "sniffio-1.3.0.tar.gz", hash = "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101"},
714
+ ]
715
+
716
+ [[package]]
717
+ name = "tomli"
718
+ version = "2.0.1"
719
+ description = "A lil' TOML parser"
720
+ optional = false
721
+ python-versions = ">=3.7"
722
+ files = [
723
+ {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
724
+ {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
725
+ ]
726
+
727
+ [[package]]
728
+ name = "tqdm"
729
+ version = "4.65.0"
730
+ description = "Fast, Extensible Progress Meter"
731
+ optional = false
732
+ python-versions = ">=3.7"
733
+ files = [
734
+ {file = "tqdm-4.65.0-py3-none-any.whl", hash = "sha256:c4f53a17fe37e132815abceec022631be8ffe1b9381c2e6e30aa70edc99e9671"},
735
+ {file = "tqdm-4.65.0.tar.gz", hash = "sha256:1871fb68a86b8fb3b59ca4cdd3dcccbc7e6d613eeed31f4c332531977b89beb5"},
736
+ ]
737
+
738
+ [package.dependencies]
739
+ colorama = {version = "*", markers = "platform_system == \"Windows\""}
740
+
741
+ [package.extras]
742
+ dev = ["py-make (>=0.1.0)", "twine", "wheel"]
743
+ notebook = ["ipywidgets (>=6)"]
744
+ slack = ["slack-sdk"]
745
+ telegram = ["requests"]
746
+
747
+ [[package]]
748
+ name = "typing-extensions"
749
+ version = "4.5.0"
750
+ description = "Backported and Experimental Type Hints for Python 3.7+"
751
+ optional = false
752
+ python-versions = ">=3.7"
753
+ files = [
754
+ {file = "typing_extensions-4.5.0-py3-none-any.whl", hash = "sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4"},
755
+ {file = "typing_extensions-4.5.0.tar.gz", hash = "sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb"},
756
+ ]
757
+
758
+ [[package]]
759
+ name = "urllib3"
760
+ version = "1.26.16"
761
+ description = "HTTP library with thread-safe connection pooling, file post, and more."
762
+ optional = false
763
+ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
764
+ files = [
765
+ {file = "urllib3-1.26.16-py2.py3-none-any.whl", hash = "sha256:8d36afa7616d8ab714608411b4a3b13e58f463aee519024578e062e141dce20f"},
766
+ {file = "urllib3-1.26.16.tar.gz", hash = "sha256:8f135f6502756bde6b2a9b28989df5fbe87c9970cecaa69041edcce7f0589b14"},
767
+ ]
768
+
769
+ [package.extras]
770
+ brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"]
771
+ secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"]
772
+ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
773
+
774
+ [[package]]
775
+ name = "websockets"
776
+ version = "11.0.3"
777
+ description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)"
778
+ optional = false
779
+ python-versions = ">=3.7"
780
+ files = [
781
+ {file = "websockets-11.0.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3ccc8a0c387629aec40f2fc9fdcb4b9d5431954f934da3eaf16cdc94f67dbfac"},
782
+ {file = "websockets-11.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d67ac60a307f760c6e65dad586f556dde58e683fab03323221a4e530ead6f74d"},
783
+ {file = "websockets-11.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:84d27a4832cc1a0ee07cdcf2b0629a8a72db73f4cf6de6f0904f6661227f256f"},
784
+ {file = "websockets-11.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffd7dcaf744f25f82190856bc26ed81721508fc5cbf2a330751e135ff1283564"},
785
+ {file = "websockets-11.0.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7622a89d696fc87af8e8d280d9b421db5133ef5b29d3f7a1ce9f1a7bf7fcfa11"},
786
+ {file = "websockets-11.0.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bceab846bac555aff6427d060f2fcfff71042dba6f5fca7dc4f75cac815e57ca"},
787
+ {file = "websockets-11.0.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:54c6e5b3d3a8936a4ab6870d46bdd6ec500ad62bde9e44462c32d18f1e9a8e54"},
788
+ {file = "websockets-11.0.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:41f696ba95cd92dc047e46b41b26dd24518384749ed0d99bea0a941ca87404c4"},
789
+ {file = "websockets-11.0.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:86d2a77fd490ae3ff6fae1c6ceaecad063d3cc2320b44377efdde79880e11526"},
790
+ {file = "websockets-11.0.3-cp310-cp310-win32.whl", hash = "sha256:2d903ad4419f5b472de90cd2d40384573b25da71e33519a67797de17ef849b69"},
791
+ {file = "websockets-11.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:1d2256283fa4b7f4c7d7d3e84dc2ece74d341bce57d5b9bf385df109c2a1a82f"},
792
+ {file = "websockets-11.0.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e848f46a58b9fcf3d06061d17be388caf70ea5b8cc3466251963c8345e13f7eb"},
793
+ {file = "websockets-11.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:aa5003845cdd21ac0dc6c9bf661c5beddd01116f6eb9eb3c8e272353d45b3288"},
794
+ {file = "websockets-11.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b58cbf0697721120866820b89f93659abc31c1e876bf20d0b3d03cef14faf84d"},
795
+ {file = "websockets-11.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:660e2d9068d2bedc0912af508f30bbeb505bbbf9774d98def45f68278cea20d3"},
796
+ {file = "websockets-11.0.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c1f0524f203e3bd35149f12157438f406eff2e4fb30f71221c8a5eceb3617b6b"},
797
+ {file = "websockets-11.0.3-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:def07915168ac8f7853812cc593c71185a16216e9e4fa886358a17ed0fd9fcf6"},
798
+ {file = "websockets-11.0.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b30c6590146e53149f04e85a6e4fcae068df4289e31e4aee1fdf56a0dead8f97"},
799
+ {file = "websockets-11.0.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:619d9f06372b3a42bc29d0cd0354c9bb9fb39c2cbc1a9c5025b4538738dbffaf"},
800
+ {file = "websockets-11.0.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:01f5567d9cf6f502d655151645d4e8b72b453413d3819d2b6f1185abc23e82dd"},
801
+ {file = "websockets-11.0.3-cp311-cp311-win32.whl", hash = "sha256:e1459677e5d12be8bbc7584c35b992eea142911a6236a3278b9b5ce3326f282c"},
802
+ {file = "websockets-11.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:e7837cb169eca3b3ae94cc5787c4fed99eef74c0ab9506756eea335e0d6f3ed8"},
803
+ {file = "websockets-11.0.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:9f59a3c656fef341a99e3d63189852be7084c0e54b75734cde571182c087b152"},
804
+ {file = "websockets-11.0.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2529338a6ff0eb0b50c7be33dc3d0e456381157a31eefc561771ee431134a97f"},
805
+ {file = "websockets-11.0.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:34fd59a4ac42dff6d4681d8843217137f6bc85ed29722f2f7222bd619d15e95b"},
806
+ {file = "websockets-11.0.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:332d126167ddddec94597c2365537baf9ff62dfcc9db4266f263d455f2f031cb"},
807
+ {file = "websockets-11.0.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:6505c1b31274723ccaf5f515c1824a4ad2f0d191cec942666b3d0f3aa4cb4007"},
808
+ {file = "websockets-11.0.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:f467ba0050b7de85016b43f5a22b46383ef004c4f672148a8abf32bc999a87f0"},
809
+ {file = "websockets-11.0.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:9d9acd80072abcc98bd2c86c3c9cd4ac2347b5a5a0cae7ed5c0ee5675f86d9af"},
810
+ {file = "websockets-11.0.3-cp37-cp37m-win32.whl", hash = "sha256:e590228200fcfc7e9109509e4d9125eace2042fd52b595dd22bbc34bb282307f"},
811
+ {file = "websockets-11.0.3-cp37-cp37m-win_amd64.whl", hash = "sha256:b16fff62b45eccb9c7abb18e60e7e446998093cdcb50fed33134b9b6878836de"},
812
+ {file = "websockets-11.0.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:fb06eea71a00a7af0ae6aefbb932fb8a7df3cb390cc217d51a9ad7343de1b8d0"},
813
+ {file = "websockets-11.0.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8a34e13a62a59c871064dfd8ffb150867e54291e46d4a7cf11d02c94a5275bae"},
814
+ {file = "websockets-11.0.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4841ed00f1026dfbced6fca7d963c4e7043aa832648671b5138008dc5a8f6d99"},
815
+ {file = "websockets-11.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a073fc9ab1c8aff37c99f11f1641e16da517770e31a37265d2755282a5d28aa"},
816
+ {file = "websockets-11.0.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:68b977f21ce443d6d378dbd5ca38621755f2063d6fdb3335bda981d552cfff86"},
817
+ {file = "websockets-11.0.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1a99a7a71631f0efe727c10edfba09ea6bee4166a6f9c19aafb6c0b5917d09c"},
818
+ {file = "websockets-11.0.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:bee9fcb41db2a23bed96c6b6ead6489702c12334ea20a297aa095ce6d31370d0"},
819
+ {file = "websockets-11.0.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:4b253869ea05a5a073ebfdcb5cb3b0266a57c3764cf6fe114e4cd90f4bfa5f5e"},
820
+ {file = "websockets-11.0.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:1553cb82942b2a74dd9b15a018dce645d4e68674de2ca31ff13ebc2d9f283788"},
821
+ {file = "websockets-11.0.3-cp38-cp38-win32.whl", hash = "sha256:f61bdb1df43dc9c131791fbc2355535f9024b9a04398d3bd0684fc16ab07df74"},
822
+ {file = "websockets-11.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:03aae4edc0b1c68498f41a6772d80ac7c1e33c06c6ffa2ac1c27a07653e79d6f"},
823
+ {file = "websockets-11.0.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:777354ee16f02f643a4c7f2b3eff8027a33c9861edc691a2003531f5da4f6bc8"},
824
+ {file = "websockets-11.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8c82f11964f010053e13daafdc7154ce7385ecc538989a354ccc7067fd7028fd"},
825
+ {file = "websockets-11.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3580dd9c1ad0701169e4d6fc41e878ffe05e6bdcaf3c412f9d559389d0c9e016"},
826
+ {file = "websockets-11.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f1a3f10f836fab6ca6efa97bb952300b20ae56b409414ca85bff2ad241d2a61"},
827
+ {file = "websockets-11.0.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:df41b9bc27c2c25b486bae7cf42fccdc52ff181c8c387bfd026624a491c2671b"},
828
+ {file = "websockets-11.0.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:279e5de4671e79a9ac877427f4ac4ce93751b8823f276b681d04b2156713b9dd"},
829
+ {file = "websockets-11.0.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:1fdf26fa8a6a592f8f9235285b8affa72748dc12e964a5518c6c5e8f916716f7"},
830
+ {file = "websockets-11.0.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:69269f3a0b472e91125b503d3c0b3566bda26da0a3261c49f0027eb6075086d1"},
831
+ {file = "websockets-11.0.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:97b52894d948d2f6ea480171a27122d77af14ced35f62e5c892ca2fae9344311"},
832
+ {file = "websockets-11.0.3-cp39-cp39-win32.whl", hash = "sha256:c7f3cb904cce8e1be667c7e6fef4516b98d1a6a0635a58a57528d577ac18a128"},
833
+ {file = "websockets-11.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:c792ea4eabc0159535608fc5658a74d1a81020eb35195dd63214dcf07556f67e"},
834
+ {file = "websockets-11.0.3-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:f2e58f2c36cc52d41f2659e4c0cbf7353e28c8c9e63e30d8c6d3494dc9fdedcf"},
835
+ {file = "websockets-11.0.3-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:de36fe9c02995c7e6ae6efe2e205816f5f00c22fd1fbf343d4d18c3d5ceac2f5"},
836
+ {file = "websockets-11.0.3-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0ac56b661e60edd453585f4bd68eb6a29ae25b5184fd5ba51e97652580458998"},
837
+ {file = "websockets-11.0.3-pp37-pypy37_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e052b8467dd07d4943936009f46ae5ce7b908ddcac3fda581656b1b19c083d9b"},
838
+ {file = "websockets-11.0.3-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:42cc5452a54a8e46a032521d7365da775823e21bfba2895fb7b77633cce031bb"},
839
+ {file = "websockets-11.0.3-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:e6316827e3e79b7b8e7d8e3b08f4e331af91a48e794d5d8b099928b6f0b85f20"},
840
+ {file = "websockets-11.0.3-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8531fdcad636d82c517b26a448dcfe62f720e1922b33c81ce695d0edb91eb931"},
841
+ {file = "websockets-11.0.3-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c114e8da9b475739dde229fd3bc6b05a6537a88a578358bc8eb29b4030fac9c9"},
842
+ {file = "websockets-11.0.3-pp38-pypy38_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e063b1865974611313a3849d43f2c3f5368093691349cf3c7c8f8f75ad7cb280"},
843
+ {file = "websockets-11.0.3-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:92b2065d642bf8c0a82d59e59053dd2fdde64d4ed44efe4870fa816c1232647b"},
844
+ {file = "websockets-11.0.3-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0ee68fe502f9031f19d495dae2c268830df2760c0524cbac5d759921ba8c8e82"},
845
+ {file = "websockets-11.0.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dcacf2c7a6c3a84e720d1bb2b543c675bf6c40e460300b628bab1b1efc7c034c"},
846
+ {file = "websockets-11.0.3-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b67c6f5e5a401fc56394f191f00f9b3811fe843ee93f4a70df3c389d1adf857d"},
847
+ {file = "websockets-11.0.3-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d5023a4b6a5b183dc838808087033ec5df77580485fc533e7dab2567851b0a4"},
848
+ {file = "websockets-11.0.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:ed058398f55163a79bb9f06a90ef9ccc063b204bb346c4de78efc5d15abfe602"},
849
+ {file = "websockets-11.0.3-py3-none-any.whl", hash = "sha256:6681ba9e7f8f3b19440921e99efbb40fc89f26cd71bf539e45d8c8a25c976dc6"},
850
+ {file = "websockets-11.0.3.tar.gz", hash = "sha256:88fc51d9a26b10fc331be344f1781224a375b78488fc343620184e95a4b27016"},
851
+ ]
852
+
853
+ [metadata]
854
+ lock-version = "2.0"
855
+ python-versions = "^3.8"
856
+ content-hash = "80634bedd72b53e96d00fe6cbad0d9bfbbdda1e017c24f19d6de41d046f566c7"
client/poetry.toml ADDED
@@ -0,0 +1 @@
 
 
1
+ virtualenvs.in-project = true
client/pyproject.toml ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.poetry]
2
+ name = "h2ogpt-client"
3
+ version = "0.1.0"
4
+ description = ""
5
+ authors = []
6
+ readme = "README.md"
7
+ include = ["h2ogpt_client/_h2ogpt*"]
8
+
9
+ [tool.poetry.dependencies]
10
+ python = "^3.8"
11
+ gradio-client = "^0.6.1"
12
+
13
+ [tool.poetry.group.test.dependencies]
14
+ pytest = "7.2.2"
15
+ pytest-asyncio = "^0.21.0"
16
+
17
+ [tool.poetry.group.dev.dependencies]
18
+ mypy = "^1.3.0"
19
+ black = "^23.3.0"
20
+ flake8 = "5.0.4"
21
+ isort = "^5.12.0"
22
+ flake8-pyproject = "^1.2.3"
23
+
24
+ [build-system]
25
+ requires = ["poetry-core"]
26
+ build-backend = "poetry.core.masonry.api"
27
+
28
+ [tool.isort]
29
+ profile = "black"
30
+ py_version = "auto"
31
+
32
+ [tool.flake8]
33
+ max-line-length = 88
34
+
35
+ [tool.mypy]
36
+ python_version = "3.8"
37
+
38
+ [tool.pytest.ini_options]
39
+ pythonpath = "h2ogpt_client"
40
+ log_cli = true
41
+ log_cli_level = "INFO"
client/tests/__init__.py ADDED
File without changes
client/tests/conftest.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import importlib.util
2
+ import logging
3
+ import os
4
+ import sys
5
+ from pathlib import Path
6
+ from types import ModuleType
7
+
8
+ import pytest
9
+
10
+ LOGGER = logging.getLogger(__name__)
11
+
12
+
13
+ @pytest.fixture(scope="module")
14
+ def server_url():
15
+ server_url = os.getenv("H2OGPT_SERVER")
16
+ if not server_url:
17
+ LOGGER.info("Couldn't find a running h2oGPT server. Hence starting a one.")
18
+
19
+ generate = _import_module_from_h2ogpt("generate.py")
20
+ generate.main(
21
+ base_model="h2oai/h2ogpt-oig-oasst1-512-6_9b",
22
+ prompt_type="human_bot",
23
+ chat=False,
24
+ stream_output=False,
25
+ gradio=True,
26
+ num_beams=1,
27
+ block_gradio_exit=False,
28
+ )
29
+ server_url = "http://0.0.0.0:7860" # assume server started
30
+ LOGGER.info(f"h2oGPT server started at '{server_url}'.")
31
+ return server_url
32
+
33
+
34
+ @pytest.fixture(scope="module")
35
+ def h2ogpt_key():
36
+ return os.getenv("H2OGPT_KEY") or os.getenv("H2OGPT_H2OGPT_KEY")
37
+
38
+
39
+ @pytest.fixture(scope="module")
40
+ def eval_func_param_names():
41
+ parameters = _import_module_from_h2ogpt("src/evaluate_params.py")
42
+ return parameters.eval_func_param_names
43
+
44
+
45
+ def _import_module_from_h2ogpt(file_name: str) -> ModuleType:
46
+ h2ogpt_dir = Path(__file__).parent.parent.parent
47
+ file_path = (h2ogpt_dir / file_name).absolute()
48
+ module_name = file_path.stem
49
+
50
+ LOGGER.info(f"Loading module '{module_name}' from '{file_path}'.")
51
+ spec = importlib.util.spec_from_file_location(module_name, file_path)
52
+ if not spec:
53
+ raise Exception(f"Couldn't load module '{module_name}' from '{file_path}'.")
54
+ module = importlib.util.module_from_spec(spec)
55
+ sys.modules[module_name] = module
56
+ spec.loader.exec_module(module) # type: ignore
57
+ return module
client/tests/test_client.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import platform
2
+
3
+ import pytest
4
+
5
+ from h2ogpt_client import Client
6
+
7
+ platform.python_version()
8
+
9
+
10
+ @pytest.fixture
11
+ def client(server_url, h2ogpt_key) -> Client:
12
+ return Client(server_url, h2ogpt_key=h2ogpt_key)
13
+
14
+
15
+ def _create_text_completion(client):
16
+ model = client.models.list()[-1]
17
+ return client.text_completion.create(model=model)
18
+
19
+
20
+ @pytest.mark.asyncio
21
+ async def test_text_completion(client):
22
+ text_completion = _create_text_completion(client)
23
+ response = await text_completion.complete(prompt="Hello world")
24
+ assert response
25
+ print(response)
26
+
27
+
28
+ @pytest.mark.asyncio
29
+ async def test_text_completion_stream(client):
30
+ text_completion = _create_text_completion(client)
31
+ response = await text_completion.complete(
32
+ prompt="Write a poem about the Amazon rainforest. End it with an emoji.",
33
+ enable_streaming=True,
34
+ )
35
+ async for token in response:
36
+ assert token
37
+ print(token, end="")
38
+
39
+
40
+ def test_text_completion_sync(client):
41
+ text_completion = _create_text_completion(client)
42
+ response = text_completion.complete_sync(prompt="Hello world")
43
+ assert response
44
+ print(response)
45
+
46
+
47
+ def test_text_completion_sync_stream(client):
48
+ text_completion = _create_text_completion(client)
49
+ response = text_completion.complete_sync(
50
+ prompt="Write a poem about the Amazon rainforest. End it with an emoji.",
51
+ enable_streaming=True,
52
+ )
53
+ for token in response:
54
+ assert token
55
+ print(token, end="")
56
+
57
+
58
+ def _create_chat_completion(client):
59
+ model = client.models.list()[-1]
60
+ return client.chat_completion.create(model=model)
61
+
62
+
63
+ @pytest.mark.asyncio
64
+ async def test_chat_completion(client):
65
+ chat_completion = _create_chat_completion(client)
66
+
67
+ chat1 = await chat_completion.chat(prompt="Hey!")
68
+ assert chat1["user"] == "Hey!"
69
+ assert chat1["gpt"]
70
+
71
+ chat2 = await chat_completion.chat(prompt="What is the capital of USA?")
72
+ assert chat2["user"] == "What is the capital of USA?"
73
+ assert chat2["gpt"]
74
+
75
+ chat3 = await chat_completion.chat(prompt="What is the population in there?")
76
+ assert chat3["user"] == "What is the population in there?"
77
+ assert chat3["gpt"]
78
+
79
+ chat_history = chat_completion.chat_history()
80
+ assert chat_history == [chat1, chat2, chat3]
81
+ print(chat_history)
82
+
83
+
84
+ def test_chat_completion_sync(client):
85
+ chat_completion = _create_chat_completion(client)
86
+
87
+ chat1 = chat_completion.chat_sync(prompt="What is UNESCO?")
88
+ assert chat1["user"] == "What is UNESCO?"
89
+ assert chat1["gpt"]
90
+
91
+ chat2 = chat_completion.chat_sync(prompt="Is it a part of the UN?")
92
+ assert chat2["user"] == "Is it a part of the UN?"
93
+ assert chat2["gpt"]
94
+
95
+ chat3 = chat_completion.chat_sync(prompt="Where is the headquarters?")
96
+ assert chat3["user"] == "Where is the headquarters?"
97
+ assert chat3["gpt"]
98
+
99
+ chat_history = chat_completion.chat_history()
100
+ assert chat_history == [chat1, chat2, chat3]
101
+ print(chat_history)
102
+
103
+
104
+ def test_available_models(client):
105
+ models = client.models.list()
106
+ assert len(models)
107
+ print(models)
108
+
109
+
110
+ def test_server_properties(client, server_url):
111
+ assert client.server.address.startswith(server_url)
112
+ assert client.server.hash
113
+
114
+
115
+ def test_parameters_order(client, eval_func_param_names):
116
+ text_completion = client.text_completion.create()
117
+ assert eval_func_param_names == list(text_completion._parameters.keys())
118
+ chat_completion = client.chat_completion.create()
119
+ assert eval_func_param_names == list(chat_completion._parameters.keys())
120
+
121
+
122
+ @pytest.mark.parametrize("local_server", [True, False])
123
+ def test_readme_example(local_server):
124
+ # self-contained example used for readme,
125
+ # to be copied to client/README.md if changed, setting local_server = True at first
126
+ import asyncio
127
+ import os
128
+
129
+ from h2ogpt_client import Client
130
+
131
+ if local_server:
132
+ client = Client("http://0.0.0.0:7860")
133
+ else:
134
+ h2ogpt_key = os.getenv("H2OGPT_KEY") or os.getenv("H2OGPT_H2OGPT_KEY")
135
+ if h2ogpt_key is None:
136
+ return
137
+ # if you have API key for public instance:
138
+ client = Client("https://gpt.h2o.ai", h2ogpt_key=h2ogpt_key)
139
+
140
+ # Text completion
141
+ text_completion = client.text_completion.create()
142
+ response = asyncio.run(text_completion.complete("Hello world"))
143
+ print("asyncio text completion response: %s" % response)
144
+ # Text completion: synchronous
145
+ response = text_completion.complete_sync("Hello world")
146
+ print("sync text completion response: %s" % response)
147
+
148
+ # Chat completion
149
+ chat_completion = client.chat_completion.create()
150
+ reply = asyncio.run(chat_completion.chat("Hey!"))
151
+ print("asyncio text completion user: %s gpt: %s" % (reply["user"], reply["gpt"]))
152
+ chat_history = chat_completion.chat_history()
153
+ print("chat_history: %s" % chat_history)
154
+ # Chat completion: synchronous
155
+ reply = chat_completion.chat_sync("Hey!")
156
+ print("sync chat completion gpt: %s" % reply["gpt"])
cloud/packer/Jenkinsfile ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import org.jenkinsci.plugins.pipeline.modeldefinition.Utils
2
+
3
+ properties(
4
+ [
5
+ parameters(
6
+ [
7
+ string(name: 'BRANCH_TAG', defaultValue: 'origin/main'),
8
+ booleanParam(name: 'AZURE', defaultValue: true, description: 'Make Azure Machine Image/Not?'),
9
+ booleanParam(name: 'GCP', defaultValue: true, description: 'Make GCP Image/Not?'),
10
+ string(name: 'H2OGPT_VERSION', defaultValue: "010", description: 'Example: for version 1.10.5 use 1105')
11
+ ]
12
+ )
13
+ ]
14
+ )
15
+
16
+ node('linux && docker') {
17
+ stage('Init') {
18
+ cleanWs()
19
+ currentBuild.displayName = "#${BUILD_NUMBER} - Rel:${H2OGPT_VERSION}"
20
+ checkout scm
21
+ sh('ls -al')
22
+ }
23
+
24
+ stage('Build Images') {
25
+ try {
26
+ docker.image('harbor.h2o.ai/opsh2oai/h2oai-packer-build:2').inside {
27
+ parallel([
28
+ "GCP Ubuntu 20.04": {
29
+ withCredentials([file(credentialsId: 'GCP_MARKETPLACE_SERVICE_ACCOUNT', variable: 'GCP_ACCOUNT_FILE')]) {
30
+ dir('cloud/packer') {
31
+ if (params.GCP) {
32
+ sh("packer build \
33
+ --force \
34
+ -var 'project_id=h2o-gce' \
35
+ -var 'account_file=$GCP_ACCOUNT_FILE' \
36
+ -var 'h2ogpt_version=${H2OGPT_VERSION}' \
37
+ -var 'branch_tag=${BRANCH_TAG}' \
38
+ h2ogpt-gcp.json"
39
+ )
40
+ archiveArtifacts artifacts: '*-image-info.json'
41
+ }else {
42
+ Utils.markStageSkippedForConditional('GCP Ubuntu 20.04')
43
+ }
44
+ }
45
+ }
46
+ },
47
+
48
+ "AZURE Ubuntu 20.04": {
49
+ withCredentials([string(credentialsId: "AZURE_MARKETPLACE_CLIENT_ID", variable: "AZURE_CLIENT_ID"),
50
+ string(credentialsId: "AZURE_MARKETPLACE_CLIENT_SECRET", variable: "AZURE_CLIENT_SECRET"),
51
+ string(credentialsId: "AZURE_MARKETPLACE_SUBSCRIPTION_ID", variable: "AZURE_SUBSCRIPTION_ID"),
52
+ string(credentialsId: "AZURE_MARKETPLACE_TENANT_ID", variable: "AZURE_TENANT_ID")]) {
53
+ dir('cloud/packer') {
54
+ if (params.AZURE) {
55
+ sh("packer build \
56
+ --force \
57
+ -var 'client_id=$AZURE_CLIENT_ID' \
58
+ -var 'client_secret=$AZURE_CLIENT_SECRET' \
59
+ -var 'managed_image_resource_group_name=H2OIMAGES' \
60
+ -var 'subscription_id=$AZURE_SUBSCRIPTION_ID' \
61
+ -var 'tenant_id=$AZURE_TENANT_ID' \
62
+ -var 'h2ogpt_version=${H2OGPT_VERSION}' \
63
+ -var 'branch_tag=${BRANCH_TAG}' \
64
+ h2ogpt-azure.json"
65
+ )
66
+ archiveArtifacts artifacts: '*-image-info.json'
67
+ }else {
68
+ Utils.markStageSkippedForConditional('AZURE Ubuntu 20.04')
69
+ }
70
+ }
71
+ }
72
+ },
73
+
74
+ ])
75
+ }
76
+ } finally {
77
+ cleanWs()
78
+ }
79
+ }
80
+ }
cloud/packer/README.md ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # h2oGPT Packer Templates
2
+
3
+ These scripts help create images in public clouds that can then submitted to Azure/GCP Marketplace for commercial use.
4
+
5
+ ### Packer Scripts
6
+ - Azure - `h2ogpt-azure.json`
7
+ - GCP - `h2ogpt-gcp.json`
8
+
9
+ ### Provisioning Scripts
10
+ - `setup_environment.sh`
11
+ - Responsible for setting up CUDA, GCC, Nginx, Python
12
+ - `install_h2ogpt.sh`
13
+ - Responsible for setting up h2oGPT with its dependencies
14
+ - `h2oai-h2ogpt-4096-llama2-13b-chat.sh`
15
+ - Responsible for setting up default model h2oai-h2ogpt-4096-llama2-13b-chat with vLLM in port 80 via Nginx
16
+ - vLLM, h2oGPT and Nginx are executed through services
17
+ - Model is downloaded at the runtime
18
+
19
+ __Jenkins Pipeline__: http://jenkins.h2o.local:8080/job/build-h2ogpt-cloud-images/
20
+
21
+ ### Notes:
22
+ - Since model is downloaded at the runtime after VM is provisioned it takes around 5 - 10 min start h2oGPT correctly
cloud/packer/h2oai-h2ogpt-4096-llama2-13b-chat.sh ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash -e
2
+
3
+ sudo systemctl daemon-reload
4
+ sudo systemctl enable h2ogpt_nginx.service
5
+ sudo systemctl enable vllm.service
6
+ sudo systemctl enable h2ogpt.service
7
+
8
+ cd "$HOME"
9
+ # sudo rm -rf "$HOME"/.cache/huggingface/hub/
10
+ sudo DEBIAN_FRONTEND=noninteractive apt-get -y autoremove
11
+ sudo DEBIAN_FRONTEND=noninteractive apt-get -y clean
cloud/packer/h2ogpt-azure.json ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "variables": {
3
+ "client_id": "<AZURE CLIENT ID>",
4
+ "client_secret": "<AZURE CLIENT SECRET>",
5
+ "subscription_id": "92429150-401a-431f-8955-e69c0c119e68",
6
+ "tenant_id": "840229f2-c911-49e6-a73d-5b3a4311835a",
7
+ "managed_image_resource_group_name": "H2OIMAGES",
8
+ "h2ogpt_version": "010",
9
+ "branch_tag": "main",
10
+ "base_model": "h2oai-h2ogpt-4096-llama2-13b-chat"
11
+ },
12
+ "builders": [
13
+ {
14
+ "type": "azure-arm",
15
+ "client_id": "{{user `client_id`}}",
16
+ "client_secret": "{{user `client_secret`}}",
17
+ "subscription_id": "{{user `subscription_id`}}",
18
+ "tenant_id": "{{user `tenant_id`}}",
19
+ "capture_container_name": "h2ovhdimages",
20
+ "capture_name_prefix": "h2ogpt-{{user `h2ogpt_version`}}",
21
+ "resource_group_name": "{{user `managed_image_resource_group_name`}}",
22
+ "temp_resource_group_name": "Engineering_DevOps_h2oGPT-Ubuntu",
23
+ "storage_account": "h2ovhdimages",
24
+ "os_type": "Linux",
25
+ "image_publisher": "Canonical",
26
+ "image_offer": "0001-com-ubuntu-server-focal",
27
+ "image_sku": "20_04-lts",
28
+ "os_disk_size_gb": 512,
29
+ "azure_tags": {
30
+ "dept": "Engineering",
31
+ "task": "Image deployment",
32
+ "Name": "H2OGPT-CLOUD-IMAGES",
33
+ "Owner": "ops@h2o.ai",
34
+ "Project": "DevOps",
35
+ "Department": "Engineering",
36
+ "Environment": "Dev",
37
+ "Scheduling": "self-managed"
38
+ },
39
+ "location": "East US",
40
+ "vm_size": "Standard_NC24s_v3",
41
+ "ssh_username": "ubuntu"
42
+ }
43
+ ],
44
+ "post-processors": [
45
+ {
46
+ "type": "manifest",
47
+ "output": "azure-ubuntu-image-info.json",
48
+ "strip_path": true,
49
+ "custom_data": {
50
+ "base_image": "AZURE Ubuntu 20.04",
51
+ "h2ogpt_version": "{{user `h2ogpt_version`}}"
52
+ }
53
+ }
54
+ ],
55
+ "provisioners": [
56
+ {
57
+ "type": "shell",
58
+ "script": "setup_environment.sh",
59
+ "pause_before": "10s",
60
+ "pause_after": "10s"
61
+ },
62
+ {
63
+ "type": "shell",
64
+ "inline": ["sudo reboot now"],
65
+ "pause_after": "10s",
66
+ "expect_disconnect": true
67
+ },
68
+ {
69
+ "type": "shell",
70
+ "environment_vars": ["BRANCH_TAG={{user `branch_tag`}}"],
71
+ "script": "install_h2ogpt.sh",
72
+ "pause_after": "10s"
73
+ },
74
+ {
75
+ "type": "shell",
76
+ "inline": [
77
+ "sudo chown -R ubuntu:ubuntu /etc/nginx/conf.d",
78
+ "sudo chown -R ubuntu:ubuntu /etc/systemd/system/"
79
+ ],
80
+ "pause_before": "10s"
81
+ },
82
+ {
83
+ "type": "file",
84
+ "source": "./startup-scripts/run_nginx.sh",
85
+ "destination": "/workspace/run_nginx.sh"
86
+ },
87
+ {
88
+ "type": "file",
89
+ "source": "./startup-scripts/run_vllm.sh",
90
+ "destination": "/workspace/run_vllm.sh"
91
+ },
92
+ {
93
+ "type": "file",
94
+ "source": "./startup-scripts/run_h2ogpt.sh",
95
+ "destination": "/workspace/run_h2ogpt.sh"
96
+ },
97
+ {
98
+ "type": "file",
99
+ "source": "./startup-scripts/h2ogpt_nginx.service",
100
+ "destination": "/etc/systemd/system/h2ogpt_nginx.service"
101
+ },
102
+ {
103
+ "type": "file",
104
+ "source": "./startup-scripts/vllm.service",
105
+ "destination": "/etc/systemd/system/vllm.service"
106
+ },
107
+ {
108
+ "type": "file",
109
+ "source": "./startup-scripts/h2ogpt.service",
110
+ "destination": "/etc/systemd/system/h2ogpt.service"
111
+ },
112
+ {
113
+ "type": "file",
114
+ "source": "./startup-scripts/temp.conf",
115
+ "destination": "/workspace/temp.conf"
116
+ },
117
+ {
118
+ "type": "shell",
119
+ "script": "{{user `base_model`}}.sh",
120
+ "pause_after": "10s"
121
+ }
122
+ ]
123
+ }
cloud/packer/h2ogpt-gcp.json ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "variables": {
3
+ "project_id": "eng-llm",
4
+ "account_file": "<NAME OF GCP CREDENTIALS JSON FILE>",
5
+ "h2ogpt_version": "010",
6
+ "branch_tag": "main",
7
+ "base_model": "h2oai-h2ogpt-4096-llama2-13b-chat"
8
+ },
9
+ "builders": [
10
+ {
11
+ "type": "googlecompute",
12
+ "project_id": "{{user `project_id`}}",
13
+ "account_file": "{{user `account_file`}}",
14
+ "machine_type": "n1-standard-8",
15
+ "on_host_maintenance": "TERMINATE",
16
+ "accelerator_type": "projects/{{user `project_id`}}/zones/us-west1-b/acceleratorTypes/nvidia-tesla-t4",
17
+ "accelerator_count": "4",
18
+ "source_image_family": "ubuntu-2004-lts",
19
+ "zone": "us-west1-b",
20
+ "image_description": "h2ogpt using Packer",
21
+ "image_name": "h2ogpt-{{user `h2ogpt_version`}}",
22
+ "disk_size": 512,
23
+ "disk_type": "pd-ssd",
24
+ "ssh_username": "ubuntu",
25
+ "tags": ["h2ogpt"]
26
+ }
27
+ ],
28
+ "post-processors": [
29
+ {
30
+ "type": "manifest",
31
+ "output": "gcp-image-info.json",
32
+ "strip_path": true,
33
+ "custom_data": {
34
+ "base_image": "GCP Ubuntu 20.04",
35
+ "h2ogpt_version": "{{user `h2ogpt_version`}}"
36
+ }
37
+ }
38
+ ],
39
+ "provisioners": [
40
+ {
41
+ "type": "shell",
42
+ "script": "setup_environment.sh",
43
+ "pause_before": "10s",
44
+ "pause_after": "10s"
45
+ },
46
+ {
47
+ "type": "shell",
48
+ "inline": ["sudo reboot now"],
49
+ "pause_after": "10s",
50
+ "expect_disconnect": true
51
+ },
52
+ {
53
+ "type": "shell",
54
+ "environment_vars": ["BRANCH_TAG={{user `branch_tag`}}"],
55
+ "script": "install_h2ogpt.sh",
56
+ "pause_after": "10s"
57
+ },
58
+ {
59
+ "type": "shell",
60
+ "inline": [
61
+ "sudo chown -R ubuntu:ubuntu /etc/nginx/conf.d",
62
+ "sudo chown -R ubuntu:ubuntu /etc/systemd/system/"
63
+ ],
64
+ "pause_before": "10s"
65
+ },
66
+ {
67
+ "type": "file",
68
+ "source": "./startup-scripts/run_nginx.sh",
69
+ "destination": "/workspace/run_nginx.sh"
70
+ },
71
+ {
72
+ "type": "file",
73
+ "source": "./startup-scripts/run_vllm.sh",
74
+ "destination": "/workspace/run_vllm.sh"
75
+ },
76
+ {
77
+ "type": "file",
78
+ "source": "./startup-scripts/run_h2ogpt.sh",
79
+ "destination": "/workspace/run_h2ogpt.sh"
80
+ },
81
+ {
82
+ "type": "file",
83
+ "source": "./startup-scripts/h2ogpt_nginx.service",
84
+ "destination": "/etc/systemd/system/h2ogpt_nginx.service"
85
+ },
86
+ {
87
+ "type": "file",
88
+ "source": "./startup-scripts/vllm.service",
89
+ "destination": "/etc/systemd/system/vllm.service"
90
+ },
91
+ {
92
+ "type": "file",
93
+ "source": "./startup-scripts/h2ogpt.service",
94
+ "destination": "/etc/systemd/system/h2ogpt.service"
95
+ },
96
+ {
97
+ "type": "file",
98
+ "source": "./startup-scripts/temp.conf",
99
+ "destination": "/workspace/temp.conf"
100
+ },
101
+ {
102
+ "type": "shell",
103
+ "script": "{{user `base_model`}}.sh",
104
+ "pause_after": "10s"
105
+ }
106
+ ]
107
+ }
cloud/packer/install_h2ogpt.sh ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash -e
2
+
3
+ export PATH=$PATH:/home/ubuntu/.local/bin
4
+ sudo mkdir -p /workspace && cd /workspace
5
+ sudo chmod a+rwx .
6
+
7
+ git config --global --add safe.directory /workspace
8
+ git config --global advice.detachedHead false
9
+ git clone https://github.com/h2oai/h2ogpt.git .
10
+
11
+ if [ -z "$BRANCH_TAG" ]; then
12
+ echo "BRANCH_TAG environment variable is not set."
13
+ exit 1
14
+ fi
15
+
16
+ git checkout $BRANCH_TAG
17
+
18
+ ls -la
19
+ sudo ./docker_build_script_ubuntu.sh
cloud/packer/setup_environment.sh ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash -e
2
+
3
+ sudo DEBIAN_FRONTEND=noninteractive apt-get -y update
4
+ sudo DEBIAN_FRONTEND=noninteractive apt-get -y --no-install-recommends install \
5
+ git \
6
+ software-properties-common \
7
+ pandoc \
8
+ curl \
9
+ apt-utils \
10
+ make \
11
+ build-essential \
12
+ wget \
13
+ gnupg2 \
14
+ ca-certificates \
15
+ lsb-release \
16
+ ubuntu-keyring
17
+
18
+ curl https://nginx.org/keys/nginx_signing.key | gpg --dearmor | sudo tee /usr/share/keyrings/nginx-archive-keyring.gpg >/dev/null
19
+ gpg --dry-run --quiet --no-keyring --import --import-options import-show /usr/share/keyrings/nginx-archive-keyring.gpg
20
+ echo "deb [signed-by=/usr/share/keyrings/nginx-archive-keyring.gpg] http://nginx.org/packages/ubuntu `lsb_release -cs` nginx" sudo tee /etc/apt/sources.list.d/nginx.list
21
+ echo -e "Package: *\nPin: origin nginx.org\nPin: release o=nginx\nPin-Priority: 900\n" sudo tee /etc/apt/preferences.d/99nginx
22
+
23
+ sudo DEBIAN_FRONTEND=noninteractive apt -y update
24
+ sudo DEBIAN_FRONTEND=noninteractive apt -y install nginx
25
+
26
+ MAX_GCC_VERSION=11
27
+ sudo DEBIAN_FRONTEND=noninteractive add-apt-repository -y ppa:ubuntu-toolchain-r/test
28
+ sudo DEBIAN_FRONTEND=noninteractive apt-get -y install gcc-$MAX_GCC_VERSION g++-$MAX_GCC_VERSION
29
+
30
+ sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-$MAX_GCC_VERSION 100
31
+ sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-$MAX_GCC_VERSION 100
32
+ sudo update-alternatives --set gcc /usr/bin/gcc-$MAX_GCC_VERSION
33
+ sudo update-alternatives --set g++ /usr/bin/g++-$MAX_GCC_VERSION
34
+
35
+ wget --quiet https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin
36
+ sudo mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600
37
+ wget --quiet https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda-repo-ubuntu2004-11-8-local_11.8.0-520.61.05-1_amd64.deb
38
+ sudo dpkg -i cuda-repo-ubuntu2004-11-8-local_11.8.0-520.61.05-1_amd64.deb
39
+ sudo cp /var/cuda-repo-ubuntu2004-11-8-local/cuda-*-keyring.gpg /usr/share/keyrings/
40
+ sudo DEBIAN_FRONTEND=noninteractive apt-get -y update
41
+ sudo DEBIAN_FRONTEND=noninteractive apt-get -y install cuda
42
+ sudo rm -rf "*.deb"
43
+
44
+ sudo echo "export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.8/lib64/" >> ~/.bashrc
45
+ sudo echo "export CUDA_HOME=/usr/local/cuda-11.8" >> ~/.bashrc
46
+ sudo echo "export PATH=$PATH:/h2ogpt_conda/bin:/usr/local/cuda-11.8/bin/" >> ~/.bashrc
cloud/packer/startup-scripts/h2ogpt.service ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [Unit]
2
+ Description=h2oGPT Server
3
+ After=network.target
4
+
5
+ [Service]
6
+ Type=simple
7
+ User=ubuntu
8
+ WorkingDirectory=/workspace
9
+ ExecStart=/usr/bin/bash /workspace/run_h2ogpt.sh
10
+
11
+ [Install]
12
+ WantedBy=multi-user.target
cloud/packer/startup-scripts/h2ogpt_nginx.service ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [Unit]
2
+ Description=h2oGPT Nginx Server
3
+ After=network.target
4
+
5
+ [Service]
6
+ Type=simple
7
+ User=ubuntu
8
+ WorkingDirectory=/workspace
9
+ ExecStart=/usr/bin/bash /workspace/run_nginx.sh
10
+
11
+ [Install]
12
+ WantedBy=multi-user.target
cloud/packer/startup-scripts/run_h2ogpt.sh ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash -e
2
+
3
+ while true; do
4
+ http_code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:5000/v1/completions \
5
+ -H "Content-Type: application/json" \
6
+ -d '{
7
+ "model": "h2oai/h2ogpt-4096-llama2-13b-chat",
8
+ "prompt": "San Francisco is a",
9
+ "max_tokens": 7,
10
+ "temperature": 0
11
+ }')
12
+
13
+ if [ "$http_code" -eq 200 ]; then
14
+ echo "Received HTTP 200 status code. Starting h2ogpt service"
15
+ CUDA_VISIBLE_DEVICES=$(seq -s, $(($(nvidia-smi -L | wc -l) / 2)) $(($(nvidia-smi -L | wc -l) - 1))) /h2ogpt_conda/bin/python3.10 \
16
+ /workspace/generate.py \
17
+ --inference_server="vllm:0.0.0.0:5000" \
18
+ --base_model=h2oai/h2ogpt-4096-llama2-13b-chat \
19
+ --langchain_mode=UserData
20
+ break
21
+ else
22
+ echo "Received HTTP $http_code status code. Retrying in 5 seconds..."
23
+ sleep 5
24
+ fi
25
+ done
26
+
cloud/packer/startup-scripts/run_nginx.sh ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash -e
2
+
3
+ while true; do
4
+ http_code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:5000/v1/completions \
5
+ -H "Content-Type: application/json" \
6
+ -d '{
7
+ "model": "h2oai/h2ogpt-4096-llama2-13b-chat",
8
+ "prompt": "San Francisco is a",
9
+ "max_tokens": 7,
10
+ "temperature": 0
11
+ }')
12
+
13
+ if [ "$http_code" -eq 200 ]; then
14
+ echo "Received HTTP 200 status code. Restarting Nginx for h2oGPT"
15
+ ip=$(dig +short myip.opendns.com @resolver1.opendns.com)
16
+ sed "s/<|_SUBST_PUBLIC_IP|>;/$ip;/g" /workspace/temp.conf > /etc/nginx/conf.d/h2ogpt.conf
17
+ sudo systemctl restart nginx.service
18
+ break
19
+ else
20
+ echo "Received HTTP $http_code status code. Retrying in 5 seconds..."
21
+ sleep 5
22
+ fi
23
+ done
cloud/packer/startup-scripts/run_vllm.sh ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash -e
2
+
3
+ tps=$(nvidia-smi --query-gpu=gpu_name --format=csv,noheader | wc -l | awk '{if ($1 > 1) print int($1/2); else print 1}')
4
+ NCCL_IGNORE_DISABLED_P2P=1 CUDA_VISIBLE_DEVICES=$(seq -s, 0 $(($(nvidia-smi -L | wc -l) > 1 ? $(nvidia-smi -L | wc -l) / 2 - 1 : 0))) \
5
+ /h2ogpt_conda/vllm_env/bin/python3.10 -m vllm.entrypoints.openai.api_server \
6
+ --port=5000 \
7
+ --host=0.0.0.0 \
8
+ --model h2oai/h2ogpt-4096-llama2-13b-chat \
9
+ --tokenizer=hf-internal-testing/llama-tokenizer \
10
+ --tensor-parallel-size=$tps --seed 1234
cloud/packer/startup-scripts/temp.conf ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ server {
2
+ listen 80;
3
+ listen [::]:80;
4
+ server_name <|_SUBST_PUBLIC_IP|>; # Change this to your domain name
5
+
6
+ location / { # Change this if you'd like to server your Gradio app on a different path
7
+ proxy_pass http://0.0.0.0:7860/; # Change this if your Gradio app will be running on a different port
8
+ proxy_redirect off;
9
+ proxy_http_version 1.1;
10
+ proxy_set_header Upgrade $http_upgrade;
11
+ proxy_set_header Connection "upgrade";
12
+ proxy_set_header Host $host;
13
+ }
14
+ }
cloud/packer/startup-scripts/vllm.service ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [Unit]
2
+ Description=vLLM Server
3
+ After=network.target
4
+
5
+ [Service]
6
+ Type=simple
7
+ User=ubuntu
8
+ WorkingDirectory=/workspace
9
+ ExecStart=/usr/bin/bash /workspace/run_vllm.sh
10
+
11
+ [Install]
12
+ WantedBy=multi-user.target