File size: 11,231 Bytes
b585c7f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
#!/bin/bash
set -o pipefail
set -ex

shopt -s expand_aliases
if ! test -f /usr/bin/sudo; then
  echo "No sudo"
  alias sudo=' '
fi

#* Optional: For document Q/A and use of DocTR.  Install before other pips to avoid long conflict checks.
conda install weasyprint pygobject -c conda-forge -y
#   Avoids library mismatch.
#* Install primary dependencies
# fix any bad env
pip uninstall -y pandoc pypandoc pypandoc-binary flash-attn
# broad support, but no training-time or data creation dependencies

  pip install -r requirements.txt
#* Optional: Install document question-answer dependencies:
# May be required for jq package:
sudo apt-get -y install autoconf libtool
# Required for Doc Q/A: LangChain:
pip install -r reqs_optional/requirements_optional_langchain.txt
# Required for CPU: LLaMa/GPT4All:
pip install -r reqs_optional/requirements_optional_gpt4all.txt
# Optional: PyMuPDF/ArXiv:
#   Note!! that pymupdf is AGPL, requiring any source code be made available, but it's like GPL and too strong a constraint for general commercial use.
if [ "${GPLOK}" -eq "1" ]
then
    pip install -r reqs_optional/requirements_optional_langchain.gpllike.txt
fi
# Optional: FAISS
pip install -r reqs_optional/requirements_optional_faiss.txt
# Optional: Selenium/PlayWright:
pip install -r reqs_optional/requirements_optional_langchain.urls.txt

# Optional: support docx, pptx, ArXiv, etc. required by some python packages
sudo apt-get install -y libmagic-dev poppler-utils tesseract-ocr libtesseract-dev libreoffice

# Optional: For DocTR
pip install -r reqs_optional/requirements_optional_doctr.txt
# For DocTR: go back to older onnx so Tesseract OCR still works
pip install onnxruntime==1.15.0
# GPU only:
pip install onnxruntime-gpu==1.15.0

# Optional: for supporting unstructured package
for i in 1 2 3 4; do python -m nltk.downloader all && break || sleep 1; done  # retry as frequently fails with github downloading issues
# Optional: Required for PlayWright
playwright install --with-deps
# Audio transcription from Youtube videos and local mp3 files:
pip install pydub==0.25.1 librosa==0.10.1 ffmpeg==1.4 yt_dlp==2023.10.13 wavio==0.0.8
# Audio speed-up and slowdown (best quality), if not installed can only speed-up with lower quality
sudo apt-get install -y rubberband-cli
pip install pyrubberband==0.3.0
# https://stackoverflow.com/questions/75813603/python-working-with-sound-librosa-and-pyrubberband-conflict
pip uninstall -y pysoundfile soundfile
pip install soundfile==0.12.1
# Optional: Only for testing for now
pip install playsound==1.3.0
# STT from microphone (may not be required if ffmpeg installed above)
sudo apt-get install ffmpeg
# for any TTS:
pip install torchaudio soundfile==0.12.1
# GPU Only: for Coqui XTTS (ensure CUDA_HOME set and consistent with added postfix for extra-index):
# relaxed versions to avoid conflicts
pip install TTS deepspeed noisereduce emoji ffmpeg-python==0.2.0 trainer pysbd coqpit
# for Coqui XTTS language helpers (specific versions probably not required)
pip install cutlet==0.3.0 langid==1.1.6 g2pkk==0.1.2 jamo==0.4.1 gruut[de,es,fr]==2.2.3 jieba==0.42.1
# For faster whisper:
pip install git+https://github.com/SYSTRAN/faster-whisper.git
# needed for librosa/soundfile to work, but violates TTS, but that's probably just too strict as we have seen before)
pip install numpy==1.23.0 --no-deps --upgrade
# TTS or other deps load old librosa, fix:
pip install librosa==0.10.1 --no-deps --upgrade
#* STT and TTS Notes:
#  * STT: Ensure microphone is on and in browser go to http://localhost:7860 instead of http://0.0.0.0:7860 for microphone to be possible to allow in browser.
#  * TTS: For XTT models, ensure `CUDA_HOME` is set correctly, because deepspeed compiles at runtime using torch and nvcc.  Those must match CUDA version.  E.g. if used `--extra-index https://download.pytorch.org/whl/cu118`, then must have ENV `CUDA_HOME=/usr/local/cuda-11.7` or ENV from conda must be that version.  Since conda only has up to cuda 11.7 for dev toolkit, but H100+ need cuda 11.8, for those cases one should download the toolkit from NVIDIA.

# Vision/Image packages
pip install fiftyone
pip install pytube
pip install diffusers==0.24.0

#* HNSW issue:
#    In some cases old chroma migration package will install old hnswlib and that may cause issues when making a database, then do:
pip uninstall -y hnswlib chroma-hnswlib
# restore correct version
pip install chroma-hnswlib==0.7.3 --upgrade
#* Selenium needs to have chrome installed, e.g. on Ubuntu:
sudo apt install -y unzip xvfb libxi6 libgconf-2-4 libu2f-udev
sudo apt install -y default-jdk
if [ 1 -eq 0 ]; then
    sudo bash -c 'curl -sS -o - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add'
    sudo bash -c "echo 'deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main' >> /etc/apt/sources.list.d/google-chrome.list"
    sudo apt -y update
    sudo apt -y install google-chrome-stable  # e.g. Google Chrome 114.0.5735.198
fi
wget http://dl.google.com/linux/chrome/deb/pool/main/g/google-chrome-stable/google-chrome-stable_114.0.5735.198-1_amd64.deb
sudo dpkg -i google-chrome-stable_114.0.5735.198-1_amd64.deb
sudo google-chrome --version  # e.g. Google Chrome 114.0.5735.198
# visit https://chromedriver.chromium.org/downloads and download matching version
# E.g.
sudo rm -rf chromedriver_linux64.zip chromedriver LICENSE.chromedriver
sudo wget https://chromedriver.storage.googleapis.com/114.0.5735.90/chromedriver_linux64.zip
sudo unzip chromedriver_linux64.zip
sudo mv chromedriver /usr/bin/chromedriver
sudo chown root:root /usr/bin/chromedriver
sudo chmod +x /usr/bin/chromedriver
#* GPU Optional: For AutoGPTQ support on x86_64 linux
pip uninstall -y auto-gptq ; pip install auto-gptq==0.6.0 --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/
# in-transformers support of AutoGPTQ, requires also auto-gptq above to be installed since used internally by transformers/optimum
pip install optimum==1.16.1
#    See [AutoGPTQ](README_GPU.md#autogptq) about running AutoGPT models.
#* GPU Optional: For AutoAWQ support on x86_64 linux
pip uninstall -y autoawq ; pip install https://github.com/casper-hansen/AutoAWQ/releases/download/v0.1.8/autoawq-0.1.8+cu118-cp310-cp310-linux_x86_64.whl
# fix version since don't need lm-eval to have its version of 1.5.0
pip install sacrebleu==2.3.1 --upgrade
#    If this has issues, you need to build:
if [ 1 -eq 0 ]
then
    pip uninstall -y autoawq
    git clone https://github.com/casper-hansen/AutoAWQ
    cd AutoAWQ
    pip install .
fi
#* GPU Optional: For exllama support on x86_64 linux
pip uninstall -y exllama ; pip install https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu118-cp310-cp310-linux_x86_64.whl --no-cache-dir
#    See [exllama](README_GPU.md#exllama) about running exllama models.

#  * If any issues with llama_cpp_python, then must compile llama-cpp-python with CUDA support:
if [ 1 -eq 0 ]
then
    pip uninstall -y llama-cpp-python llama-cpp-python-cuda
    export LLAMA_CUBLAS=1
    export CMAKE_ARGS=-DLLAMA_CUBLAS=on
    export FORCE_CMAKE=1
    CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python==0.2.26 --no-cache-dir --verbose
fi
#  * By default, we set `n_gpu_layers` to large value, so llama.cpp offloads all layers for maximum GPU performance.  You can control this by passing `--llamacpp_dict="{'n_gpu_layers':20}"` for value 20, or setting in UI.  For highest performance, offload *all* layers.
#    That is, one gets maximum performance if one sees in startup of h2oGPT all layers offloaded:
#      ```text
#    llama_model_load_internal: offloaded 35/35 layers to GPU
#    ```
#  but this requires sufficient GPU memory.  Reduce if you have low memory GPU, say 15.
#  * Pass to `generate.py` the option `--max_seq_len=2048` or some other number if you want model have controlled smaller context, else default (relatively large) value is used that will be slower on CPU.
#  * For LLaMa2, can set `max_tokens` to a larger value for longer output.
#  * If one sees `/usr/bin/nvcc` mentioned in errors, that file needs to be removed as would likely conflict with version installed for conda.
#  * Note that once `llama-cpp-python` is compiled to support CUDA, it no longer works for CPU mode, so one would have to reinstall it without the above options to recovers CPU mode or have a separate h2oGPT env for CPU mode.

#* GPU Optional: Support amazon/MistralLite with flash attention 2
if [[ -v CUDA_HOME ]];
then
    pip install flash-attn==2.3.4 --no-build-isolation
fi
#* Control Core Count for chroma < 0.4 using chromamigdb package:
#    * Duckdb used by Chroma < 0.4 uses DuckDB 0.8.1 that has no control over number of threads per database, `import duckdb` leads to all virtual cores as threads and each db consumes another number of threads equal to virtual cores.  To prevent this, one can rebuild duckdb using [this modification](https://github.com/h2oai/duckdb/commit/dcd8c1ffc53dd020623630efb99ba6a3a4cbc5ad) or one can try to use the prebuild wheel for x86_64 built on Ubuntu 20.
pip uninstall -y pyduckdb duckdb
pip install https://h2o-release.s3.amazonaws.com/h2ogpt/duckdb-0.8.2.dev4025%2Bg9698e9e6a8.d20230907-cp310-cp310-linux_x86_64.whl --no-cache-dir --force-reinstall --no-deps
#* SERP for search:
pip install -r reqs_optional/requirements_optional_agents.txt
#  For more info see [SERP Docs](README_SerpAPI.md).
#* Deal with not-thread-safe things in LangChain:
pwd0=`pwd`
sp=`python3.10 -c 'import site; print(site.getsitepackages()[0])'`
cd $sp
sed -i  's/with HiddenPrints():/if True:/g' langchain/utilities/serpapi.py
#sed -i 's/"progress": Status.PROGRESS,/"progress": Status.PROGRESS,\n            "heartbeat": Status.PROGRESS,/g' gradio_client/utils.py
#sed -i 's/async for line in response.aiter_text():/async for line in response.aiter_lines():\n                if len(line) == 0:\n                    continue\n                if line == """{"detail":"Not Found"}""":\n                    continue/g' gradio_client/utils.py
cd $pwd0

# fix pytube to avoid errors for restricted content
sp=`python3.10 -c 'import site; print(site.getsitepackages()[0])'`
sed -i "s/client='ANDROID_MUSIC'/client='ANDROID'/g" $sp/pytube/innertube.py

# fix asyncio same way websockets was fixed, else keep hitting errors in async calls
# https://github.com/python-websockets/websockets/commit/f9fd2cebcd42633ed917cd64e805bea17879c2d7
sp=`python3.10 -c 'import site; print(site.getsitepackages()[0])'`
sed -i "s/except OSError:/except (OSError, RuntimeError):/g" $sp/anyio/_backends/_asyncio.py

#* PDF View support
# only if using gradio4
#pip install https://h2o-release.s3.amazonaws.com/h2ogpt/gradio_pdf-0.0.3-py3-none-any.whl


### Compile Install Issues
#  * `/usr/local/cuda/include/crt/host_config.h:132:2: error: #error -- unsupported GNU version! gcc versions later than 11 are not supported!`
#    * gcc > 11 is not currently supported by nvcc.  Install GCC with a maximum version:
if [ 1 -eq 0 ]
then
    MAX_GCC_VERSION=11
    sudo apt install gcc-$MAX_GCC_VERSION g++-$MAX_GCC_VERSION
    sudo update-alternatives --config gcc
    # pick version 11
    sudo update-alternatives --config g++
    # pick version 11
fi