Spaces:

heikowagner
/

GPT-Docker

Build error

App Files Files Community

heikowagner commited on May 3, 2023

Commit

aeb550e

1 Parent(s): 39b12fb

kkk

Browse files

Files changed (6) hide show

Dockerfile +2 -2
app/VectorStore/chroma-collections.parquet +1 -1
app/app.py +1 -1
app/load_model.py +2 -2
app/load_vectors.py +1 -1
app/utils.py +4 -12

Dockerfile CHANGED Viewed

@@ -9,14 +9,14 @@ RUN pip install -r requirements.txt
 #RUN python load_docs.py
 RUN --mount=type=secret,id=OPENAI_API_KEY \
   cat /run/secrets/OPENAI_API_KEY > .openaiapikey
-RUN mkdir /app/.cache
 RUN mkdir /nltk_data
 RUN mkdir /VectorStore
 RUN ls -la
 RUN python run.py
 RUN chmod 777 /VectorStore
 RUN chmod 777 /nltk_data
-RUN chmod 777 /app/.cache
 CMD ["streamlit", "run", "app.py", "--server.port=7860"]
 #CMD ls -la
 EXPOSE 7860

 #RUN python load_docs.py
 RUN --mount=type=secret,id=OPENAI_API_KEY \
   cat /run/secrets/OPENAI_API_KEY > .openaiapikey
+RUN mkdir /.cache
 RUN mkdir /nltk_data
 RUN mkdir /VectorStore
 RUN ls -la
 RUN python run.py
 RUN chmod 777 /VectorStore
 RUN chmod 777 /nltk_data
+RUN chmod 777 /.cache
 CMD ["streamlit", "run", "app.py", "--server.port=7860"]
 #CMD ls -la
 EXPOSE 7860

app/VectorStore/chroma-collections.parquet CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:79cf0a7bde715ca62bcfb8bf4f9a737f550dc282abdbde3a3d861114be54c984
 size 967

 version https://git-lfs.github.com/spec/v1
+oid sha256:9770f8b53664f3a358faee66aa23720c091943c176225f0bf2487bd1767d872a
 size 967

app/app.py CHANGED Viewed

@@ -44,7 +44,7 @@ else:
     st.write('You selected:', option['name'])
-    chain = load_model.create_chain(llm, collection=option['name'], model_name=option['model_name'])
     try:
         query = st.text_area('Ask a question:', 'Hallo how are you today?')
         result = chain({"query": query})

     st.write('You selected:', option['name'])
+    chain = load_model.create_chain(llm, collection=option['name'], model_name=option['model_name'], metadata= option['metadata'])
     try:
         query = st.text_area('Ask a question:', 'Hallo how are you today?')
         result = chain({"query": query})

app/load_model.py CHANGED Viewed

@@ -88,7 +88,7 @@ def load_openai_model():
 def load_openai_embedding():
     return OpenAIEmbeddings()
-@st.cache_resource
 def load_embedding(model_name):
     embeddings = HuggingFaceInstructEmbeddings(
             query_instruction="Represent the query for retrieval: ",
@@ -113,7 +113,7 @@ def load_vectorstore(model_name, collection, metadata):
         )
         return vectorstore
-def create_chain(_llm, collection, model_name, metadata=None):
     vectorstore = load_vectorstore(model_name, collection, metadata=metadata)
     retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
     chain = RetrievalQA.from_chain_type(llm=_llm, chain_type="stuff", retriever=retriever, return_source_documents=True)

 def load_openai_embedding():
     return OpenAIEmbeddings()
+#@st.cache_resource
 def load_embedding(model_name):
     embeddings = HuggingFaceInstructEmbeddings(
             query_instruction="Represent the query for retrieval: ",
         )
         return vectorstore
+def create_chain(_llm, collection, model_name, metadata):
     vectorstore = load_vectorstore(model_name, collection, metadata=metadata)
     retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
     chain = RetrievalQA.from_chain_type(llm=_llm, chain_type="stuff", retriever=retriever, return_source_documents=True)

app/load_vectors.py CHANGED Viewed

@@ -52,7 +52,7 @@ def create_and_add(collection_name, sub_docs, model_name, metadata):
     vectorstore2 = load_vectorstore(model_name, collection_name, metadata = metadata)
     print( vectorstore2.similarity_search_with_score(query="What are AXAs green Goals?", k=4) )
-    return vectorstore
 def load_from_file(files):

     vectorstore2 = load_vectorstore(model_name, collection_name, metadata = metadata)
     print( vectorstore2.similarity_search_with_score(query="What are AXAs green Goals?", k=4) )
+    return True
 def load_from_file(files):

app/utils.py CHANGED Viewed

@@ -22,7 +22,7 @@ def format_result_set(result):
         for document in source_documents:
             st.write(format_document(document))
-#@st.cache_resource
 def get_chroma_client():
     return chromadb.Client(Settings(chroma_db_impl="duckdb+parquet",
                                     persist_directory=persist_directory
@@ -31,9 +31,7 @@ def get_chroma_client():
 def retrieve_collections():
     client = get_chroma_client()
     all_collections = client.list_collections()
-    print(all_collections)
-    print(all_collections[0].metadata)
-    collections = tuple( [{'name': collection.name, 'model_name': collection.metadata['model_name']} for collection in all_collections] )
     return collections
 def load_files():
@@ -69,11 +67,7 @@ def load_files():
             if st.button('Upload'):
                 docs = load_from_file(uploaded_files)
                 sub_docs = load_and_split(docs, chunk_size=int(chunk_size))
-                print(sub_docs)
-                #create_and_add(selected_collection["name"], sub_docs, selected_collection['model_name'], {"model_name": selected_collection['model_name']})
-                create_and_add(selected_collection["name"], sub_docs, "hkunlp/instructor-large", metadata={"model_name": "hkunlp/instructor-large"})
-                uploaded_files=None
                 st.write("Upload succesful")
         else:
             st.write('Urls of Source Documents (Comma separated):')
@@ -84,9 +78,7 @@ def load_files():
             if st.button('Upload'):
                 docs = load_from_web(urls)
                 sub_docs = load_and_split(docs, chunk_size=int(chunk_size))
-                print(selected_collection['model_name'])
-                create_and_add(selected_collection["name"], sub_docs, selected_collection['model_name'], {"model_name": selected_collection['model_name']})
-                uploaded_files=None
                 st.write("Upload succesful")
     else:
         collection = st.text_area('Name of your new collection:', '')

         for document in source_documents:
             st.write(format_document(document))
+@st.cache_resource
 def get_chroma_client():
     return chromadb.Client(Settings(chroma_db_impl="duckdb+parquet",
                                     persist_directory=persist_directory
 def retrieve_collections():
     client = get_chroma_client()
     all_collections = client.list_collections()
+    collections = tuple( [{'name': collection.name, 'model_name': collection.metadata['model_name'], "metadata": collection.metadata} for collection in all_collections] )
     return collections
 def load_files():
             if st.button('Upload'):
                 docs = load_from_file(uploaded_files)
                 sub_docs = load_and_split(docs, chunk_size=int(chunk_size))
+                vec1 = create_and_add(selected_collection["name"], sub_docs, selected_collection['model_name'], selected_collection['metadata'])
                 st.write("Upload succesful")
         else:
             st.write('Urls of Source Documents (Comma separated):')
             if st.button('Upload'):
                 docs = load_from_web(urls)
                 sub_docs = load_and_split(docs, chunk_size=int(chunk_size))
+                vec2 = create_and_add(selected_collection["name"], sub_docs, selected_collection['model_name'], selected_collection['metadata'])
                 st.write("Upload succesful")
     else:
         collection = st.text_area('Name of your new collection:', '')