fakezeta commited on
Commit
fbb1222
1 Parent(s): cb7bd30

switching back to llama.cpp due to memory limits on huggingface free tier

Browse files
Files changed (2) hide show
  1. query_data.py +18 -18
  2. requirements.txt +2 -3
query_data.py CHANGED
@@ -1,26 +1,26 @@
1
- #from langchain.llms import LlamaCpp
2
- from langchain import HuggingFacePipeline
3
- from langchain.chains import ConversationalRetrievalChain
4
- #from huggingface_hub import hf_hub_download
5
 
6
- #import psutil
7
  import os
8
 
9
- offload_path = "offload"
10
 
11
  def get_chain(vectorstore):
12
- #if not os.path.exists("ggml-vic7b-q5_1.bin"):
13
- # hf_hub_download(repo_id="eachadea/ggml-vicuna-7b-1.1", filename="ggml-vic7b-q5_1.bin", local_dir=".")
14
- #llm = LlamaCpp(model_path="ggml-vic7b-q5_1.bin", n_ctx=2048, n_threads=psutil.cpu_count(logical=False))
15
- if not os.path.exists(offload_path):
16
- os.makedirs(offload_path)
17
- llm = HuggingFacePipeline.from_model_id(model_id="lmsys/fastchat-t5-3b-v1.0",
18
- task="text2text-generation",
19
- model_kwargs={"max_length":512,
20
- "device_map":"auto",
21
- "offload_folder":"offload"
22
- }
23
- )
24
  qa_chain = ConversationalRetrievalChain.from_llm(
25
  llm,
26
  vectorstore.as_retriever(),
 
1
+ from langchain.llms import LlamaCpp
2
+ #from langchain import HuggingFacePipeline
3
+ #from langchain.chains import ConversationalRetrievalChain
4
+ from huggingface_hub import hf_hub_download
5
 
6
+ import psutil
7
  import os
8
 
9
+ #offload_path = "offload"
10
 
11
  def get_chain(vectorstore):
12
+ if not os.path.exists("ggml-vic7b-q5_1.bin"):
13
+ hf_hub_download(repo_id="eachadea/ggml-vicuna-7b-1.1", filename="ggml-vic7b-q5_1.bin", local_dir=".")
14
+ llm = LlamaCpp(model_path="ggml-vic7b-q5_1.bin", n_ctx=2048, n_threads=psutil.cpu_count(logical=False))
15
+ #if not os.path.exists(offload_path):
16
+ # os.makedirs(offload_path)
17
+ #llm = HuggingFacePipeline.from_model_id(model_id="lmsys/fastchat-t5-3b-v1.0",
18
+ # task="text2text-generation",
19
+ # model_kwargs={"max_length":512,
20
+ # "device_map":"auto",
21
+ # "offload_folder":"offload"
22
+ # }
23
+ # )
24
  qa_chain = ConversationalRetrievalChain.from_llm(
25
  llm,
26
  vectorstore.as_retriever(),
requirements.txt CHANGED
@@ -1,9 +1,8 @@
1
  langchain
2
- typing-extensions>=4.5.0
3
  streamlit_chat
4
  pypdf
 
5
  chromadb
6
- tensorflow_text
7
- huggingface-hub
8
  accelerate
9
  transformers
 
1
  langchain
2
+ typing-extensions
3
  streamlit_chat
4
  pypdf
5
+ llama-cpp-python
6
  chromadb
 
 
7
  accelerate
8
  transformers