# for generate (gradio server) and finetune datasets==2.12.0 sentencepiece==0.1.97 gradio==3.34.0 huggingface_hub==0.14.1 appdirs==1.4.4 fire==0.5.0 docutils==0.19 torch==2.0.1 evaluate==0.4.0 rouge_score==0.1.2 sacrebleu==2.3.1 scikit-learn==1.2.2 alt-profanity-check==1.2.2 better-profanity==0.6.1 numpy==1.24.2 pandas==2.0.0 matplotlib==3.7.1 loralib==0.1.1 bitsandbytes==0.39.0 accelerate==0.19.0 git+https://github.com/huggingface/peft.git@3714aa2fff158fdfa637b2b65952580801d890b2 transformers==4.28.1 tokenizers==0.13.3 APScheduler==3.10.1 # optional for generate pynvml==11.5.0 psutil==5.9.4 boto3==1.26.101 botocore==1.29.101 # optional for finetune tensorboard==2.12.1 neptune==1.1.1 # for gradio client gradio_client==0.2.6 beautifulsoup4==4.12.2 markdown==3.4.1 # data and testing pytest==7.2.2 pytest-xdist==3.2.1 nltk==3.8.1 textstat==0.7.3 pandoc==2.3 #pypandoc==1.11 pypandoc_binary==1.11 openpyxl==3.1.2 lm_dataformat==0.0.20 bioc==2.0 # falcon einops==0.6.1 instructorembedding==1.0.1 # for gpt4all .env file, but avoid worrying about imports python-dotenv==1.0.0 # optional for chat with PDF langchain==0.0.193 pypdf==3.8.1 tiktoken==0.3.3 # avoid textract, requires old six #textract==1.6.5 # for HF embeddings sentence_transformers==2.2.2 # for OpenAI embeddings (requires key) openai==0.27.6 # local vector db chromadb==0.3.25 # server vector db #pymilvus==2.2.8 # weak url support, if can't install opencv etc. If comment-in this one, then comment-out unstructured[local-inference]==0.6.6 # unstructured==0.6.6 # strong support for images # Requires on Ubuntu: sudo apt-get install libmagic-dev poppler-utils tesseract-ocr libreoffice unstructured[local-inference]==0.6.6 #pdf2image==1.16.3 #pytesseract==0.3.10 pillow pdfminer.six==20221105 urllib3==1.26.6 requests_file==1.5.1 #pdf2image==1.16.3 #pytesseract==0.3.10 tabulate==0.9.0 # FYI pandoc already part of requirements.txt # JSONLoader, but makes some trouble for some users # jq==1.4.1 # to check licenses # Run: pip-licenses|grep -v 'BSD\|Apache\|MIT' pip-licenses==4.3.0 # weaviate vector db weaviate-client==3.19.2 # optional for chat with PDF langchain==0.0.193 pypdf==3.8.1 tiktoken==0.3.3 # avoid textract, requires old six #textract==1.6.5 # for HF embeddings sentence_transformers==2.2.2 # for OpenAI embeddings (requires key) openai==0.27.6 # local vector db chromadb==0.3.25 # server vector db #pymilvus==2.2.8 # weak url support, if can't install opencv etc. If comment-in this one, then comment-out unstructured[local-inference]==0.6.6 # unstructured==0.6.6 # strong support for images # Requires on Ubuntu: sudo apt-get install libmagic-dev poppler-utils tesseract-ocr libreoffice unstructured[local-inference]==0.6.6 #pdf2image==1.16.3 #pytesseract==0.3.10 pillow pdfminer.six==20221105 urllib3==1.26.6 requests_file==1.5.1 #pdf2image==1.16.3 #pytesseract==0.3.10 tabulate==0.9.0 # FYI pandoc already part of requirements.txt # JSONLoader, but makes some trouble for some users # jq==1.4.1 # to check licenses # Run: pip-licenses|grep -v 'BSD\|Apache\|MIT' pip-licenses==4.3.0 # weaviate vector db weaviate-client==3.19.2 faiss-gpu==1.7.2 arxiv==1.4.7 pymupdf==1.22.3 # AGPL license # extract-msg==0.41.1 # GPL3