Spaces:

heikowagner
/

GPT-Docker

Build error

heikowagner commited on May 5, 2023

Commit

0cd40c7

1 Parent(s): 60a70a9

add cpu model

Files changed (5) hide show

Dockerfile CHANGED Viewed

@@ -1,11 +1,34 @@
 FROM tensorflow/tensorflow:latest-gpu
 WORKDIR /app
 # RUN apt-get upgrade -y
-RUN apt-get update -y
-RUN apt-get install -y git
 RUN apt install -y make wget git gcc g++ lhasa libgmp-dev libmpfr-dev libmpc-dev flex bison gettext texinfo ncurses-dev autoconf rsync
-COPY ./app .
 RUN pip install -r requirements.txt
 #RUN python load_docs.py
 RUN --mount=type=secret,id=OPENAI_API_KEY \
   cat /run/secrets/OPENAI_API_KEY > .openaiapikey

+#Navigate to your user folder cd $env:USERPROFILE\AppData\Local\Docker\wsl\data
+#Enter the following command resize-vhd -Path .\ext4.vhdx -SizeBytes 300GB, after that I was able to continue building with docker-compose!
+FROM python:latest AS builder
+RUN apt update -y
+RUN apt install -y git git-lfs make gcc g++ libgmp-dev libmpfr-dev libmpc-dev
+RUN git lfs install
+RUN git clone https://github.com/ggerganov/llama.cpp
+RUN cd llama.cpp && make
+RUN git clone https://huggingface.co/nyanko7/LLaMA-7B
+RUN ls -la
+RUN cp -r ./LLaMA-7B ./llama.cpp/models
+RUN ls -la ./llama.cpp/models/LLaMA-7B
+# convert the 7B model to ggml FP16 format
+WORKDIR llama.cpp
+RUN python3 -m pip install -r requirements.txt
+RUN python3 convert.py ./models/LLaMA-7B
+# quantize the model to 4-bits (using q4_0 method)
+RUN mkdir ./models/7B/
+RUN ./quantize ./models/LLaMA-7B/ggml-model-f16.bin ./models/7B/ggml-model-q4_0.bin q4_0
 FROM tensorflow/tensorflow:latest-gpu
 WORKDIR /app
+COPY --from=builder /llama.cpp//models/7B/ ./mymodels/LLaMA-7B
 # RUN apt-get upgrade -y
+RUN apt update -y
+RUN apt install -y git git-lfs
 RUN apt install -y make wget git gcc g++ lhasa libgmp-dev libmpfr-dev libmpc-dev flex bison gettext texinfo ncurses-dev autoconf rsync
+COPY ./requirements.txt requirements.txt
 RUN pip install -r requirements.txt
+COPY ./app .
 #RUN python load_docs.py
 RUN --mount=type=secret,id=OPENAI_API_KEY \
   cat /run/secrets/OPENAI_API_KEY > .openaiapikey

app/app.py CHANGED Viewed

@@ -19,7 +19,7 @@ else:
     model_type = st.selectbox(
         'Select the Documents to be used to answer your question',
-        ('OpenAI', 'Load local model') )
     if model_type=='OpenAI':
         if 'openai_key' not in st.session_state:
@@ -30,9 +30,11 @@ else:
         else:
             os.environ["OPENAI_API_KEY"] = st.session_state.openai_key
         llm= load_model.load_openai_model()
-    else:
         # Add more models here
         llm = load_model.load_gpu_model("decapoda-research/llama-7b-hf")
     collections = ut.retrieve_collections()

     model_type = st.selectbox(
         'Select the Documents to be used to answer your question',
+        ('OpenAI', 'decapoda-research/llama-7b-hf (gpu+cpu)', 'llama-7b 4bit (cpu only)',) )
     if model_type=='OpenAI':
         if 'openai_key' not in st.session_state:
         else:
             os.environ["OPENAI_API_KEY"] = st.session_state.openai_key
         llm= load_model.load_openai_model()
+    elif model_type=='decapoda-research/llama-7b-hf (gpu+cpu)':
         # Add more models here
         llm = load_model.load_gpu_model("decapoda-research/llama-7b-hf")
+    else:
+        llm = load_model.load_cpu_model()
     collections = ut.retrieve_collections()

app/exploration.py CHANGED Viewed

@@ -20,4 +20,32 @@ client.create_collection(collection, embedding_function=ef, metadata={"loaded_do
 # %%
-client.list_collections()

 # %%
+my_col = client.list_collections()
+# %%
+my_col.embedding_function
+# %%
+from langchain.vectorstores import Chroma
+import load_model
+from load_model import load_embedding
+persist_directory = load_model.persist_directory
+ef = load_embedding("hkunlp/instructor-large")
+vectorstore = Chroma(
+            collection_name="papers",
+            embedding_function=ef,
+            persist_directory=persist_directory,
+        )
+# %%
+query = "What did the president say about Ketanji Brown Jackson"
+docs = vectorstore.similarity_search(query)
+# %%
+docs
+# %%
+vectorstore.similarity_search_with_score(query)

app/load_model.py CHANGED Viewed

@@ -16,6 +16,7 @@ import os
 from langchain.chains import RetrievalQA
 from langchain.indexes import VectorstoreIndexCreator
 from langchain.llms import OpenAI
 from chromadb.config import Settings
 import chromadb
@@ -30,12 +31,12 @@ persist_directory = current_path + "/VectorStore"
 @st.cache_resource
 def load_cpu_model():
     """Does not work atm, bc cpu model is not persisted"""
-    model_path= "./llama.cpp/models/LLaMA-7B/ggml-model-q4_0.bin"
     device_map = {"": int(os.environ.get("LOCAL_RANK") or 0)}
     llm = LlamaCpp(
         model_path=model_path,
         n_ctx=6000,
-        n_threads=16,
         temperature=0.6,
         top_p=0.95
         )

 from langchain.chains import RetrievalQA
 from langchain.indexes import VectorstoreIndexCreator
 from langchain.llms import OpenAI
+import multiprocessing
 from chromadb.config import Settings
 import chromadb
 @st.cache_resource
 def load_cpu_model():
     """Does not work atm, bc cpu model is not persisted"""
+    model_path= "./mymodels/LLaMA-7B/ggml-model-q4_0.bin"
     device_map = {"": int(os.environ.get("LOCAL_RANK") or 0)}
     llm = LlamaCpp(
         model_path=model_path,
         n_ctx=6000,
+        n_threads=multiprocessing.cpu_count(),
         temperature=0.6,
         top_p=0.95
         )

app/requirements.txt → requirements.txt RENAMED Viewed

File without changes