Spaces:
Build error
Build error
heikowagner
commited on
Commit
•
0cd40c7
1
Parent(s):
60a70a9
add cpu model
Browse files- Dockerfile +26 -3
- app/app.py +4 -2
- app/exploration.py +29 -1
- app/load_model.py +3 -2
- app/requirements.txt → requirements.txt +0 -0
Dockerfile
CHANGED
@@ -1,11 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
FROM tensorflow/tensorflow:latest-gpu
|
2 |
WORKDIR /app
|
|
|
3 |
# RUN apt-get upgrade -y
|
4 |
-
RUN apt
|
5 |
-
RUN apt
|
6 |
RUN apt install -y make wget git gcc g++ lhasa libgmp-dev libmpfr-dev libmpc-dev flex bison gettext texinfo ncurses-dev autoconf rsync
|
7 |
-
COPY ./
|
8 |
RUN pip install -r requirements.txt
|
|
|
9 |
#RUN python load_docs.py
|
10 |
RUN --mount=type=secret,id=OPENAI_API_KEY \
|
11 |
cat /run/secrets/OPENAI_API_KEY > .openaiapikey
|
|
|
1 |
+
#Navigate to your user folder cd $env:USERPROFILE\AppData\Local\Docker\wsl\data
|
2 |
+
#Enter the following command resize-vhd -Path .\ext4.vhdx -SizeBytes 300GB, after that I was able to continue building with docker-compose!
|
3 |
+
|
4 |
+
FROM python:latest AS builder
|
5 |
+
RUN apt update -y
|
6 |
+
RUN apt install -y git git-lfs make gcc g++ libgmp-dev libmpfr-dev libmpc-dev
|
7 |
+
RUN git lfs install
|
8 |
+
RUN git clone https://github.com/ggerganov/llama.cpp
|
9 |
+
RUN cd llama.cpp && make
|
10 |
+
RUN git clone https://huggingface.co/nyanko7/LLaMA-7B
|
11 |
+
RUN ls -la
|
12 |
+
RUN cp -r ./LLaMA-7B ./llama.cpp/models
|
13 |
+
RUN ls -la ./llama.cpp/models/LLaMA-7B
|
14 |
+
# convert the 7B model to ggml FP16 format
|
15 |
+
WORKDIR llama.cpp
|
16 |
+
RUN python3 -m pip install -r requirements.txt
|
17 |
+
RUN python3 convert.py ./models/LLaMA-7B
|
18 |
+
# quantize the model to 4-bits (using q4_0 method)
|
19 |
+
RUN mkdir ./models/7B/
|
20 |
+
RUN ./quantize ./models/LLaMA-7B/ggml-model-f16.bin ./models/7B/ggml-model-q4_0.bin q4_0
|
21 |
+
|
22 |
FROM tensorflow/tensorflow:latest-gpu
|
23 |
WORKDIR /app
|
24 |
+
COPY --from=builder /llama.cpp//models/7B/ ./mymodels/LLaMA-7B
|
25 |
# RUN apt-get upgrade -y
|
26 |
+
RUN apt update -y
|
27 |
+
RUN apt install -y git git-lfs
|
28 |
RUN apt install -y make wget git gcc g++ lhasa libgmp-dev libmpfr-dev libmpc-dev flex bison gettext texinfo ncurses-dev autoconf rsync
|
29 |
+
COPY ./requirements.txt requirements.txt
|
30 |
RUN pip install -r requirements.txt
|
31 |
+
COPY ./app .
|
32 |
#RUN python load_docs.py
|
33 |
RUN --mount=type=secret,id=OPENAI_API_KEY \
|
34 |
cat /run/secrets/OPENAI_API_KEY > .openaiapikey
|
app/app.py
CHANGED
@@ -19,7 +19,7 @@ else:
|
|
19 |
|
20 |
model_type = st.selectbox(
|
21 |
'Select the Documents to be used to answer your question',
|
22 |
-
('OpenAI', '
|
23 |
|
24 |
if model_type=='OpenAI':
|
25 |
if 'openai_key' not in st.session_state:
|
@@ -30,9 +30,11 @@ else:
|
|
30 |
else:
|
31 |
os.environ["OPENAI_API_KEY"] = st.session_state.openai_key
|
32 |
llm= load_model.load_openai_model()
|
33 |
-
|
34 |
# Add more models here
|
35 |
llm = load_model.load_gpu_model("decapoda-research/llama-7b-hf")
|
|
|
|
|
36 |
|
37 |
|
38 |
collections = ut.retrieve_collections()
|
|
|
19 |
|
20 |
model_type = st.selectbox(
|
21 |
'Select the Documents to be used to answer your question',
|
22 |
+
('OpenAI', 'decapoda-research/llama-7b-hf (gpu+cpu)', 'llama-7b 4bit (cpu only)',) )
|
23 |
|
24 |
if model_type=='OpenAI':
|
25 |
if 'openai_key' not in st.session_state:
|
|
|
30 |
else:
|
31 |
os.environ["OPENAI_API_KEY"] = st.session_state.openai_key
|
32 |
llm= load_model.load_openai_model()
|
33 |
+
elif model_type=='decapoda-research/llama-7b-hf (gpu+cpu)':
|
34 |
# Add more models here
|
35 |
llm = load_model.load_gpu_model("decapoda-research/llama-7b-hf")
|
36 |
+
else:
|
37 |
+
llm = load_model.load_cpu_model()
|
38 |
|
39 |
|
40 |
collections = ut.retrieve_collections()
|
app/exploration.py
CHANGED
@@ -20,4 +20,32 @@ client.create_collection(collection, embedding_function=ef, metadata={"loaded_do
|
|
20 |
|
21 |
|
22 |
# %%
|
23 |
-
client.list_collections()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
|
22 |
# %%
|
23 |
+
my_col = client.list_collections()
|
24 |
+
|
25 |
+
# %%
|
26 |
+
my_col.embedding_function
|
27 |
+
|
28 |
+
# %%
|
29 |
+
from langchain.vectorstores import Chroma
|
30 |
+
import load_model
|
31 |
+
|
32 |
+
from load_model import load_embedding
|
33 |
+
|
34 |
+
persist_directory = load_model.persist_directory
|
35 |
+
|
36 |
+
ef = load_embedding("hkunlp/instructor-large")
|
37 |
+
vectorstore = Chroma(
|
38 |
+
collection_name="papers",
|
39 |
+
embedding_function=ef,
|
40 |
+
persist_directory=persist_directory,
|
41 |
+
)
|
42 |
+
|
43 |
+
# %%
|
44 |
+
query = "What did the president say about Ketanji Brown Jackson"
|
45 |
+
docs = vectorstore.similarity_search(query)
|
46 |
+
|
47 |
+
|
48 |
+
# %%
|
49 |
+
docs
|
50 |
+
# %%
|
51 |
+
vectorstore.similarity_search_with_score(query)
|
app/load_model.py
CHANGED
@@ -16,6 +16,7 @@ import os
|
|
16 |
from langchain.chains import RetrievalQA
|
17 |
from langchain.indexes import VectorstoreIndexCreator
|
18 |
from langchain.llms import OpenAI
|
|
|
19 |
|
20 |
from chromadb.config import Settings
|
21 |
import chromadb
|
@@ -30,12 +31,12 @@ persist_directory = current_path + "/VectorStore"
|
|
30 |
@st.cache_resource
|
31 |
def load_cpu_model():
|
32 |
"""Does not work atm, bc cpu model is not persisted"""
|
33 |
-
model_path= "./
|
34 |
device_map = {"": int(os.environ.get("LOCAL_RANK") or 0)}
|
35 |
llm = LlamaCpp(
|
36 |
model_path=model_path,
|
37 |
n_ctx=6000,
|
38 |
-
n_threads=
|
39 |
temperature=0.6,
|
40 |
top_p=0.95
|
41 |
)
|
|
|
16 |
from langchain.chains import RetrievalQA
|
17 |
from langchain.indexes import VectorstoreIndexCreator
|
18 |
from langchain.llms import OpenAI
|
19 |
+
import multiprocessing
|
20 |
|
21 |
from chromadb.config import Settings
|
22 |
import chromadb
|
|
|
31 |
@st.cache_resource
|
32 |
def load_cpu_model():
|
33 |
"""Does not work atm, bc cpu model is not persisted"""
|
34 |
+
model_path= "./mymodels/LLaMA-7B/ggml-model-q4_0.bin"
|
35 |
device_map = {"": int(os.environ.get("LOCAL_RANK") or 0)}
|
36 |
llm = LlamaCpp(
|
37 |
model_path=model_path,
|
38 |
n_ctx=6000,
|
39 |
+
n_threads=multiprocessing.cpu_count(),
|
40 |
temperature=0.6,
|
41 |
top_p=0.95
|
42 |
)
|
app/requirements.txt → requirements.txt
RENAMED
File without changes
|