Ferdi commited on
Commit
5f0df75
1 Parent(s): c357dc5
Files changed (7) hide show
  1. Dockerfile +24 -0
  2. requirements.txt +10 -0
  3. src/app.py +64 -0
  4. src/conversation.py +52 -0
  5. src/setup.py +16 -0
  6. src/utils.py +58 -0
  7. src/vector_index.py +58 -0
Dockerfile ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.9-slim
3
+
4
+ RUN useradd -m -u 1000 user
5
+ USER user
6
+
7
+ ENV HOME=/home/user \
8
+ PATH=/home/user/.local/bin:$PATH
9
+
10
+ # Set the working directory in the container
11
+ WORKDIR $HOME/src/app
12
+
13
+ # Install any needed packages specified in requirements.txt
14
+ COPY --chown=user requirements.txt ./
15
+ RUN pip install -r requirements.txt
16
+
17
+ # Copy the rest of your application's code
18
+ COPY --chown=user ./src .
19
+
20
+ # Make port 7860 available to the world outside this container
21
+ EXPOSE 7860
22
+
23
+ # Run app.py when the container launches
24
+ CMD ["python", "./app.py"]
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ docarray==0.39.1
2
+ langchain-pinecone==0.1.5
3
+ sentence-transformers==2.3.1
4
+ gradio==4.8.0
5
+ langchain==0.1.5
6
+ openai==1.3.8
7
+ pypdf==3.17.2
8
+ docx2txt==0.8
9
+ tiktoken==0.5.2
10
+ transformers==4.36.0
src/app.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from utils import *
3
+
4
+ with gr.Blocks(gr.themes.Soft(primary_hue=gr.themes.colors.slate, secondary_hue=gr.themes.colors.purple)) as demo:
5
+ with gr.Row():
6
+
7
+ with gr.Column(scale=1, variant = 'panel'):
8
+ # gr.HTML(f"<img src='file/logo.png' width='100' height='100'>")
9
+ files = gr.File(type="filepath", file_count="multiple")
10
+ with gr.Row(equal_height=True):
11
+ vector_index_btn = gr.Button('Create vector store', variant='primary',scale=1)
12
+ vector_index_msg_out = gr.Textbox(show_label=False, lines=1,scale=1, placeholder="Creating vectore store ...")
13
+
14
+ instruction = gr.Textbox(label="System instruction", lines=3, value="Use the following pieces of context to answer the question at the end by. Generate the answer based on the given context only.If you do not find any information related to the question in the given context, just say that you don't know, don't try to make up an answer. Keep your answer expressive.")
15
+
16
+ with gr.Accordion(label="Text generation tuning parameters"):
17
+ temperature = gr.Slider(label="temperature", minimum=0.1, maximum=1, value=0.1, step=0.05)
18
+ max_new_tokens = gr.Slider(label="max_new_tokens", minimum=1, maximum=4096, value=1024, step=1)
19
+ k_context=gr.Slider(label="k_context", minimum=1, maximum=15, value=5, step=1)
20
+
21
+ vector_index_btn.click(upload_and_create_vector_store, inputs=[files], outputs=vector_index_msg_out)
22
+
23
+ with gr.Column(scale=1, variant = 'panel'):
24
+ """
25
+ with gr.Row(equal_height=True):
26
+
27
+ with gr.Column(scale=1):
28
+ llm = gr.Dropdown(choices= ["gpt-3.5-turbo", "gpt-3.5-turbo-instruct", "gpt-3.5-turbo-16k", "gpt-4", "gpt-4-32k"],
29
+ label="Select the model")
30
+
31
+ with gr.Column(scale=1):
32
+ model_load_btn = gr.Button('Load model', variant='primary',scale=2)
33
+ load_success_msg = gr.Textbox(show_label=False,lines=1, placeholder="Model loading ...")
34
+ """
35
+ with gr.Row(equal_height=True):
36
+ model_load_btn = gr.Button('Load model', variant='primary',scale=2)
37
+ load_success_msg = gr.Textbox(show_label=False,lines=1, placeholder="Model loading ...")
38
+
39
+ chatbot = gr.Chatbot([], elem_id="chatbot", label='Chatbox', height=600)
40
+
41
+ txt = gr.Textbox(label= "Question",lines=2,placeholder="Enter your question and press shift+enter ")
42
+
43
+ with gr.Row():
44
+
45
+ with gr.Column(scale=1):
46
+ submit_btn = gr.Button('Submit',variant='primary', size = 'sm')
47
+
48
+ with gr.Column(scale=1):
49
+ clear_btn = gr.Button('Clear',variant='stop',size = 'sm')
50
+
51
+ model_load_btn.click(load_models, [],load_success_msg, api_name="load_models")
52
+
53
+ txt.submit(add_text, [chatbot, txt], [chatbot, txt]).then(
54
+ bot, [chatbot,instruction,temperature,max_new_tokens,k_context], chatbot)
55
+ submit_btn.click(add_text, [chatbot, txt], [chatbot, txt]).then(
56
+ bot, [chatbot,instruction,temperature, max_new_tokens,k_context], chatbot).then(
57
+ clear_cuda_cache, None, None
58
+ )
59
+
60
+ clear_btn.click(lambda: None, None, chatbot, queue=False)
61
+
62
+ if __name__ == '__main__':
63
+ # demo.queue(concurrency_count=3)
64
+ demo.launch(server_name="0.0.0.0", server_port=7860, show_api=False)
src/conversation.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.chains import ConversationalRetrievalChain
2
+ from langchain.chat_models import ChatOpenAI
3
+ from langchain.prompts import PromptTemplate
4
+ import pinecone
5
+ from langchain_community.vectorstores import Pinecone
6
+ from langchain.embeddings.huggingface import HuggingFaceEmbeddings
7
+ import os
8
+
9
+ openai_api_key = os.environ.get("OPENAI_API_KEY")
10
+
11
+ class Conversation_RAG:
12
+ def __init__(self, model_name="gpt-3.5-turbo"):
13
+ self.model_name = model_name
14
+
15
+ def get_vectordb(self):
16
+ index = pinecone.Index(os.environ.get("PINECONE_INDEX"))
17
+ embeddings = HuggingFaceEmbeddings(
18
+ model_name="sentence-transformers/all-mpnet-base-v2",
19
+ )
20
+ vectordb = Pinecone(index, embeddings, "text")
21
+
22
+ return vectordb
23
+
24
+ def create_model(self, max_new_tokens=512, temperature=0.1):
25
+ llm = ChatOpenAI(
26
+ openai_api_key=openai_api_key,
27
+ model_name=self.model_name,
28
+ temperature=temperature,
29
+ max_tokens=max_new_tokens,
30
+ )
31
+
32
+ return llm
33
+
34
+ def create_conversation(self, model, vectordb, k_context=5, instruction="Use the following pieces of context to answer the question at the end by. Generate the answer based on the given context only. If you do not find any information related to the question in the given context, just say that you don't know, don't try to make up an answer. Keep your answer expressive."):
35
+
36
+ template = instruction + """
37
+ context:\n
38
+ {context}\n
39
+ data: {question}\n
40
+ """
41
+
42
+ QCA_PROMPT = PromptTemplate(input_variables=["instruction", "context", "question"], template=template)
43
+
44
+ qa = ConversationalRetrievalChain.from_llm(
45
+ llm=model,
46
+ chain_type='stuff',
47
+ retriever=vectordb.as_retriever(search_kwargs={"k": k_context}),
48
+ combine_docs_chain_kwargs={"prompt": QCA_PROMPT},
49
+ get_chat_history=lambda h: h,
50
+ verbose=True
51
+ )
52
+ return qa
src/setup.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from conversation import Conversation_RAG
2
+ from vector_index import *
3
+
4
+ class ModelSetup:
5
+ def __init__(self, model_name):
6
+
7
+ self.model_name = model_name
8
+
9
+ def setup(self):
10
+
11
+ conv_rag = Conversation_RAG(self.model_name)
12
+
13
+ self.vectordb = conv_rag.get_vectordb()
14
+ self.pipeline = conv_rag.create_model()
15
+
16
+ return "Model Setup Complete"
src/utils.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gc
2
+ from conversation import Conversation_RAG
3
+ from vector_index import *
4
+ from setup import ModelSetup
5
+ import json
6
+
7
+ def load_models(model_name="gpt-3.5-turbo"):
8
+ global conv_qa
9
+ conv_qa = Conversation_RAG(model_name)
10
+ global model_setup
11
+ model_setup = ModelSetup(model_name)
12
+ success_prompt = model_setup.setup()
13
+ return success_prompt
14
+
15
+ def get_chat_history(inputs):
16
+
17
+ res = []
18
+ for human, ai in inputs:
19
+ res.append(f"Human:{human}\nAssistant:{ai}")
20
+ return "\n".join(res)
21
+
22
+ def add_text(history, text):
23
+
24
+ history = history + [[text, None]]
25
+ return history, ""
26
+
27
+
28
+ def bot(history,
29
+ instruction="Use the following pieces of context to answer the question at the end. Generate the answer based on the given context only if you find the answer in the context. If you do not find any information related to the question in the given context, just say that you don't know, don't try to make up an answer. Keep your answer expressive.",
30
+ temperature=0.1,
31
+ max_new_tokens=512,
32
+ k_context=5,
33
+ ):
34
+
35
+ model = conv_qa.create_model(max_new_tokens=max_new_tokens, temperature=temperature)
36
+
37
+ qa = conv_qa.create_conversation(
38
+ model=model,
39
+ vectordb=model_setup.vectordb,
40
+ k_context=k_context,
41
+ instruction=instruction
42
+ )
43
+
44
+ chat_history_formatted = get_chat_history(history[:-1])
45
+ res = qa(
46
+ {
47
+ 'question': history[-1][0],
48
+ 'chat_history': chat_history_formatted
49
+ }
50
+ )
51
+
52
+ history[-1][1] = res['answer']
53
+ return history
54
+
55
+ def clear_cuda_cache():
56
+
57
+ gc.collect()
58
+ return None
src/vector_index.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pinecone import Pinecone
2
+ from langchain_community.document_loaders import Docx2txtLoader
3
+ from langchain.document_loaders import PyPDFLoader
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain.embeddings.huggingface import HuggingFaceEmbeddings
6
+ import os, uuid
7
+
8
+ def create_vector_store_index(file_path):
9
+
10
+ file_path_split = file_path.split(".")
11
+ file_type = file_path_split[-1].rstrip('/')
12
+
13
+ if file_type == 'csv':
14
+ loader = Docx2txtLoader(file_path)
15
+
16
+ elif file_type == 'pdf':
17
+ loader = PyPDFLoader(file_path)
18
+
19
+ pages = loader.load()
20
+
21
+ text_splitter = RecursiveCharacterTextSplitter(
22
+ chunk_size = 512,
23
+ chunk_overlap = 128)
24
+
25
+ docs = text_splitter.split_documents(pages)
26
+
27
+ pc = Pinecone(
28
+ api_key=os.environ.get("PINECONE_API_KEY"),
29
+ )
30
+
31
+ index = pc.Index(os.environ.get("PINECONE_INDEX"))
32
+
33
+ embeddings = HuggingFaceEmbeddings(
34
+ model_name="sentence-transformers/all-mpnet-base-v2",
35
+ )
36
+
37
+ batch_size = 32
38
+
39
+ for i in range(0, len(docs), batch_size):
40
+ i_end = min(len(docs), i+batch_size)
41
+ batch = docs[i:i_end]
42
+ ids = [str(uuid.uuid4()) for _ in batch]
43
+ texts = [x.page_content for x in batch]
44
+ embeds = embeddings.embed_documents(texts)
45
+ metadata = [
46
+ {'text': x.page_content, **x.metadata} for x in batch
47
+ ]
48
+ index.upsert(vectors=zip(ids, embeds, metadata))
49
+
50
+ return "Vector store index is created."
51
+
52
+
53
+ def upload_and_create_vector_store(files):
54
+
55
+ for file in files:
56
+ index_success_msg = create_vector_store_index(file)
57
+
58
+ return index_success_msg