File size: 2,087 Bytes
efe0924
24b4b28
efe0924
 
8d30b62
24b4b28
efe0924
 
 
24b4b28
efe0924
 
 
 
 
 
 
09063a8
efe0924
 
 
b43c18e
efe0924
 
65121b5
efe0924
5cf48e0
 
 
6a0a9f7
 
5cf48e0
efe0924
 
 
 
 
8d30b62
efe0924
 
 
 
 
 
 
 
 
8d30b62
 
efe0924
 
 
8d30b62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# for generate (gradio server) and finetune
datasets==2.12.0
sentencepiece==0.1.97
accelerate==0.18.0
gradio==3.31.0
huggingface_hub==0.14.1
appdirs==1.4.4
fire==0.5.0
docutils==0.19
torch==2.0.1
evaluate==0.4.0
rouge_score==0.1.2
sacrebleu==2.3.1
scikit-learn==1.2.2
alt-profanity-check==1.2.2
better-profanity==0.6.1
numpy==1.24.2
pandas==2.0.0
matplotlib==3.7.1
loralib==0.1.1
bitsandbytes==0.38.1
git+https://github.com/huggingface/peft.git@098962fa6515f2e4fe83a757f5995d3ffbb1c373
transformers==4.28.1
tokenizers==0.13.3
APScheduler==3.10.1

# optional for generate
pynvml==11.5.0
psutil==5.9.4
boto3==1.26.101
botocore==1.29.101

# optional for finetune
tensorboard==2.12.1
neptune==1.1.1

# for gradio client
gradio_client==0.2.5
beautifulsoup4==4.12.2
markdown==3.4.1

# data and testing
pytest==7.2.2
pytest-xdist==3.2.1
nltk==3.8.1
textstat==0.7.3
pandoc==2.3
#pypandoc==1.11
pypandoc_binary==1.11
openpyxl==3.1.2
lm_dataformat==0.0.20
bioc==2.0
# To install with constraints
# grep -v '#\|peft' requirements.txt > req_constraints.txt ; pip install -r requirements_optional_langchain.txt -c req_constraints.txt

# optional for chat with PDF
langchain==0.0.178
pypdf==3.8.1
tiktoken==0.3.3
# avoid textract, requires old six
#textract==1.6.5
# choose:
#faiss-cpu
faiss-gpu==1.7.2

# for HF embeddings
sentence_transformers==2.2.2
# for OpenAI embeddings (requires key)
openai==0.27.6

# local vector db
chromadb==0.3.23
# server vector db
#pymilvus==2.2.8

# weak url support, if can't install opencv etc. If comment-in this one, then comment-out unstructured[local-inference]==0.6.6
# unstructured==0.6.6

# strong support for images
# Requires on Ubuntu: sudo apt-get install libmagic-dev poppler-utils tesseract-ocr libreoffice
unstructured[local-inference]==0.6.6
#pdf2image==1.16.3
#pytesseract==0.3.10
pillow

pdfminer.six==20221105
urllib3==1.26.6
requests_file==1.5.1

#pdf2image==1.16.3
#pytesseract==0.3.10
tabulate==0.9.0
# FYI pandoc already part of requirements.txt

jq==1.4.1

# to check licenses
# Run: pip-licenses|grep -v 'BSD\|Apache\|MIT'
pip-licenses==4.3.0