Ritesh-hf commited on
Commit
91aeb7f
·
1 Parent(s): f718fe0

initial commit

Browse files
Files changed (4) hide show
  1. .env +4 -0
  2. Dockerfile +16 -0
  3. app.py +163 -0
  4. requirements.txt +99 -0
.env ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ USER_AGENT='myagent'
2
+ GROQ_API_KEY="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"
3
+ PINECONE_API_KEY="ca8e6a33-7355-453f-ad4b-80c8a1c6a9c7"
4
+ SECRET_KEY="b0*1x^y@9$)w%v+k=p!8xp@4bkt37s&b8+uf%1=mh+v1=@ybsh"
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.9
5
+
6
+ RUN useradd -m -u 1000 user
7
+ USER user
8
+ ENV PATH="/home/user/.local/bin:$PATH"
9
+
10
+ WORKDIR /app
11
+
12
+ COPY --chown=user ./requirements.txt requirements.txt
13
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
+
15
+ COPY --chown=user . /app
16
+ CMD ["gunicorn", "-b", "0.0.0.0:7860" , "main:app"]
app.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ load_dotenv(".env")
4
+
5
+ os.environ['USER_AGENT'] = os.getenv("USER_AGENT")
6
+ os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
7
+ os.environ["TOKENIZERS_PARALLELISM"]='true'
8
+
9
+ from langchain.chains import create_history_aware_retriever, create_retrieval_chain
10
+ from langchain.chains.combine_documents import create_stuff_documents_chain
11
+ from langchain_community.chat_message_histories import ChatMessageHistory
12
+ from langchain_community.document_loaders import WebBaseLoader
13
+ from langchain_core.chat_history import BaseChatMessageHistory
14
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
15
+ from langchain_core.runnables.history import RunnableWithMessageHistory
16
+
17
+ from pinecone import Pinecone
18
+ from pinecone_text.sparse import BM25Encoder
19
+
20
+ from langchain_huggingface import HuggingFaceEmbeddings
21
+ from langchain_community.retrievers import PineconeHybridSearchRetriever
22
+
23
+ from langchain_groq import ChatGroq
24
+
25
+ from flask import Flask, request
26
+ from flask_cors import CORS
27
+ from flask_limiter import Limiter
28
+ from flask_limiter.util import get_remote_address
29
+ from flask_socketio import SocketIO, emit
30
+
31
+ app = Flask(__name__)
32
+ CORS(app)
33
+ socketio = SocketIO(app, cors_allowed_origins="*")
34
+ app.config['SESSION_COOKIE_SECURE'] = True # Use HTTPS
35
+ app.config['SESSION_COOKIE_HTTPONLY'] = True
36
+ app.config['SESSION_COOKIE_SAMESITE'] = 'Lax'
37
+ app.config['SECRET_KEY'] = os.getenv('SECRET_KEY')
38
+
39
+ try:
40
+ pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
41
+ index_name = "traveler-demo-website-vectorstore"
42
+ # connect to index
43
+ pinecone_index = pc.Index(index_name)
44
+ except:
45
+ pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
46
+ index_name = "traveler-demo-website-vectorstore"
47
+ # connect to index
48
+ pinecone_index = pc.Index(index_name)
49
+
50
+ bm25 = BM25Encoder().load("bm25_traveler_website.json")
51
+
52
+ embed_model = HuggingFaceEmbeddings(model_name="Alibaba-NLP/gte-large-en-v1.5", model_kwargs={"trust_remote_code":True})
53
+
54
+ retriever = PineconeHybridSearchRetriever(
55
+ embeddings=embed_model,
56
+ sparse_encoder=bm25,
57
+ index=pinecone_index,
58
+ top_k=20,
59
+ alpha=0.5,
60
+ )
61
+
62
+ llm = ChatGroq(model="llama-3.1-70b-versatile", temperature=0.1, max_tokens=1024, max_retries=2)
63
+
64
+ ### Contextualize question ###
65
+ contextualize_q_system_prompt = """Given a chat history and the latest user question \
66
+ which might reference context in the chat history, formulate a standalone question \
67
+ which can be understood without the chat history. Do NOT answer the question, \
68
+ just reformulate it if needed and otherwise return it as is.
69
+ """
70
+ contextualize_q_prompt = ChatPromptTemplate.from_messages(
71
+ [
72
+ ("system", contextualize_q_system_prompt),
73
+ MessagesPlaceholder("chat_history"),
74
+ ("human", "{input}")
75
+ ]
76
+ )
77
+
78
+ history_aware_retriever = create_history_aware_retriever(
79
+ llm, retriever, contextualize_q_prompt
80
+ )
81
+
82
+
83
+ qa_system_prompt = """You are a highly skilled information retrieval assistant. Use the following pieces of retrieved context to answer the question. \
84
+ Provide links to sources provided in the answer. \
85
+ If you don't know the answer, just say that you don't know. \
86
+ Do not give extra long answers. \
87
+ When responding to queries, your responses should be comprehensive and well-organized. For each response: \
88
+
89
+ 1. Provide Clear Answers \
90
+
91
+ 2. Include Detailed References: \
92
+ - Include links to sources and any links or sites where there is a mentioned in the answer.
93
+ - Links to Sources: Provide URLs to credible sources where users can verify the information or explore further. \
94
+ - Downloadable Materials: Include links to any relevant downloadable resources if applicable. \
95
+ - Reference Sites: Mention specific websites or platforms that offer additional information. \
96
+
97
+ 3. Formatting for Readability: \
98
+ - Bullet Points or Lists: Where applicable, use bullet points or numbered lists to present information clearly. \
99
+ - Emphasize Important Information: Use bold or italics to highlight key details. \
100
+
101
+ 4. Organize Content Logically \
102
+
103
+ Do not include anything about context in the answer. \
104
+
105
+ {context}
106
+ """
107
+ qa_prompt = ChatPromptTemplate.from_messages(
108
+ [
109
+ ("system", qa_system_prompt),
110
+ MessagesPlaceholder("chat_history"),
111
+ ("human", "{input}")
112
+ ]
113
+ )
114
+ question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
115
+
116
+ rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
117
+
118
+ ### Statefully manage chat history ###
119
+ store = {}
120
+
121
+ def clean_temporary_data():
122
+ store = {}
123
+
124
+ def get_session_history(session_id: str) -> BaseChatMessageHistory:
125
+ if session_id not in store:
126
+ store[session_id] = ChatMessageHistory()
127
+ return store[session_id]
128
+
129
+
130
+ conversational_rag_chain = RunnableWithMessageHistory(
131
+ rag_chain,
132
+ get_session_history,
133
+ input_messages_key="input",
134
+ history_messages_key="chat_history",
135
+ output_messages_key="answer",
136
+ )
137
+
138
+ # Stream response to client
139
+ @socketio.on('message')
140
+ def handle_message(data):
141
+ question = data.get('question')
142
+ session_id = data.get('session_id', 'abc123')
143
+ chain = conversational_rag_chain.pick("answer")
144
+
145
+ try:
146
+ for chunk in conversational_rag_chain.stream(
147
+ {"input": question},
148
+ config={
149
+ "configurable": {"session_id": "abc123"}
150
+ },
151
+ ):
152
+ emit('response', chunk, room=request.sid)
153
+ except:
154
+ for chunk in conversational_rag_chain.stream(
155
+ {"input": question},
156
+ config={
157
+ "configurable": {"session_id": "abc123"}
158
+ },
159
+ ):
160
+ emit('response', chunk, room=request.sid)
161
+
162
+ if __name__ == '__main__':
163
+ socketio.run(app, debug=True)
requirements.txt ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohttp==3.9.5
2
+ aiosignal==1.3.1
3
+ annotated-types==0.7.0
4
+ anyio==4.4.0
5
+ async-timeout==4.0.3
6
+ attrs==23.2.0
7
+ bidict==0.23.1
8
+ blinker==1.8.2
9
+ certifi==2024.7.4
10
+ charset-normalizer==3.3.2
11
+ click==8.1.7
12
+ dataclasses-json==0.6.7
13
+ distro==1.9.0
14
+ exceptiongroup==1.2.2
15
+ filelock==3.15.4
16
+ flask==3.0.3
17
+ Flask-Cors==4.0.1
18
+ Flask-SocketIO==5.3.6
19
+ frozenlist==1.4.1
20
+ fsspec==2024.6.1
21
+ greenlet==3.0.3
22
+ groq==0.9.0
23
+ h11==0.14.0
24
+ httpcore==1.0.5
25
+ httpx==0.27.0
26
+ huggingface-hub==0.24.2
27
+ idna==3.7
28
+ importlib-metadata==8.2.0
29
+ itsdangerous==2.2.0
30
+ jinja2==3.1.4
31
+ joblib==1.4.2
32
+ jsonpatch==1.33
33
+ jsonpointer==3.0.0
34
+ langchain==0.2.11
35
+ langchain-community==0.2.10
36
+ langchain-core==0.2.24
37
+ langchain-groq==0.1.6
38
+ langchain-huggingface==0.0.3
39
+ langchain-text-splitters==0.2.2
40
+ langsmith==0.1.93
41
+ MarkupSafe==2.1.5
42
+ marshmallow==3.21.3
43
+ mmh3==4.1.0
44
+ mpmath==1.3.0
45
+ multidict==6.0.5
46
+ mypy-extensions==1.0.0
47
+ networkx==3.1
48
+ nltk==3.8.1
49
+ numpy==1.24.4
50
+ nvidia-cublas-cu12==12.1.3.1
51
+ nvidia-cuda-cupti-cu12==12.1.105
52
+ nvidia-cuda-nvrtc-cu12==12.1.105
53
+ nvidia-cuda-runtime-cu12==12.1.105
54
+ nvidia-cudnn-cu12==9.1.0.70
55
+ nvidia-cufft-cu12==11.0.2.54
56
+ nvidia-curand-cu12==10.3.2.106
57
+ nvidia-cusolver-cu12==11.4.5.107
58
+ nvidia-cusparse-cu12==12.1.0.106
59
+ nvidia-nccl-cu12==2.20.5
60
+ nvidia-nvjitlink-cu12==12.5.82
61
+ nvidia-nvtx-cu12==12.1.105
62
+ orjson==3.10.6
63
+ packaging==24.1
64
+ pillow==10.4.0
65
+ pinecone==4.0.0
66
+ pinecone-text==0.9.0
67
+ pydantic==2.8.2
68
+ pydantic-core==2.20.1
69
+ python-dotenv==1.0.1
70
+ python-engineio==4.9.1
71
+ python-socketio==5.11.3
72
+ PyYAML==6.0.1
73
+ regex==2024.7.24
74
+ requests==2.32.3
75
+ safetensors==0.4.3
76
+ scikit-learn==1.3.2
77
+ scipy==1.10.1
78
+ sentence-transformers==3.0.1
79
+ simple-websocket==1.0.0
80
+ sniffio==1.3.1
81
+ SQLAlchemy==2.0.31
82
+ sympy==1.13.1
83
+ tenacity==8.5.0
84
+ threadpoolctl==3.5.0
85
+ tokenizers==0.19.1
86
+ torch==2.4.0
87
+ tqdm==4.66.4
88
+ transformers==4.43.3
89
+ triton==3.0.0
90
+ types-requests==2.32.0.20240712
91
+ typing-extensions==4.12.2
92
+ typing-inspect==0.9.0
93
+ urllib3==2.2.2
94
+ werkzeug==3.0.3
95
+ wget==3.2
96
+ wsproto==1.2.0
97
+ yarl==1.9.4
98
+ zipp==3.19.2
99
+ gunicorn