Research-chatbot

Runtime error

App Files Files Community

Research-chatbot / requirements.txt

pseudotensor

Update with h2oGPT hash 13a8343d2a96885985bda8c4480bbb23cf55bb9b

eeb7ca1 over 1 year ago

raw

history blame

3.27 kB

	# for generate (gradio server) and finetune
	datasets==2.12.0
	sentencepiece==0.1.97
	gradio==3.31.0
	huggingface_hub==0.14.1
	appdirs==1.4.4
	fire==0.5.0
	docutils==0.19
	torch==2.0.1
	evaluate==0.4.0
	rouge_score==0.1.2
	sacrebleu==2.3.1
	scikit-learn==1.2.2
	alt-profanity-check==1.2.2
	better-profanity==0.6.1
	numpy==1.24.2
	pandas==2.0.0
	matplotlib==3.7.1
	loralib==0.1.1
	bitsandbytes==0.39.0
	accelerate==0.19.0
	git+https://github.com/huggingface/peft.git@3714aa2fff158fdfa637b2b65952580801d890b2
	transformers==4.28.1
	tokenizers==0.13.3
	APScheduler==3.10.1

	# optional for generate
	pynvml==11.5.0
	psutil==5.9.4
	boto3==1.26.101
	botocore==1.29.101

	# optional for finetune
	tensorboard==2.12.1
	neptune==1.1.1

	# for gradio client
	gradio_client==0.2.5
	beautifulsoup4==4.12.2
	markdown==3.4.1

	# data and testing
	pytest==7.2.2
	pytest-xdist==3.2.1
	nltk==3.8.1
	textstat==0.7.3
	pandoc==2.3
	#pypandoc==1.11
	pypandoc_binary==1.11
	openpyxl==3.1.2
	lm_dataformat==0.0.20
	bioc==2.0

	# falcon
	einops==0.6.1
	instructorembedding==1.0.1

	# for gpt4all .env file, but avoid worrying about imports
	python-dotenv==1.0.0# optional for chat with PDF
	langchain==0.0.193
	pypdf==3.8.1
	tiktoken==0.3.3
	# avoid textract, requires old six
	#textract==1.6.5

	# for HF embeddings
	sentence_transformers==2.2.2
	# for OpenAI embeddings (requires key)
	openai==0.27.6

	# local vector db
	chromadb==0.3.25
	# server vector db
	#pymilvus==2.2.8

	# weak url support, if can't install opencv etc. If comment-in this one, then comment-out unstructured[local-inference]==0.6.6
	# unstructured==0.6.6

	# strong support for images
	# Requires on Ubuntu: sudo apt-get install libmagic-dev poppler-utils tesseract-ocr libreoffice
	unstructured[local-inference]==0.6.6
	#pdf2image==1.16.3
	#pytesseract==0.3.10
	pillow

	pdfminer.six==20221105
	urllib3==1.26.6
	requests_file==1.5.1

	#pdf2image==1.16.3
	#pytesseract==0.3.10
	tabulate==0.9.0
	# FYI pandoc already part of requirements.txt

	# JSONLoader, but makes some trouble for some users
	# jq==1.4.1

	# to check licenses
	# Run: pip-licenses\|grep -v 'BSD\\|Apache\\|MIT'
	pip-licenses==4.3.0

	# weaviate vector db
	weaviate-client==3.19.2# optional for chat with PDF
	langchain==0.0.193
	pypdf==3.8.1
	tiktoken==0.3.3
	# avoid textract, requires old six
	#textract==1.6.5

	# for HF embeddings
	sentence_transformers==2.2.2
	# for OpenAI embeddings (requires key)
	openai==0.27.6

	# local vector db
	chromadb==0.3.25
	# server vector db
	#pymilvus==2.2.8

	# weak url support, if can't install opencv etc. If comment-in this one, then comment-out unstructured[local-inference]==0.6.6
	# unstructured==0.6.6

	# strong support for images
	# Requires on Ubuntu: sudo apt-get install libmagic-dev poppler-utils tesseract-ocr libreoffice
	unstructured[local-inference]==0.6.6
	#pdf2image==1.16.3
	#pytesseract==0.3.10
	pillow

	pdfminer.six==20221105
	urllib3==1.26.6
	requests_file==1.5.1

	#pdf2image==1.16.3
	#pytesseract==0.3.10
	tabulate==0.9.0
	# FYI pandoc already part of requirements.txt

	# JSONLoader, but makes some trouble for some users
	# jq==1.4.1

	# to check licenses
	# Run: pip-licenses\|grep -v 'BSD\\|Apache\\|MIT'
	pip-licenses==4.3.0

	# weaviate vector db
	weaviate-client==3.19.2faiss-gpu==1.7.2
	gpt4all==0.2.3
	llama-cpp-python==0.1.55
	arxiv==1.4.7
	pymupdf==1.22.3 # AGPL license
	# extract-msg==0.41.1 # GPL3