Chris4K commited on
Commit
3e95a64
1 Parent(s): 969689a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -235
app.py CHANGED
@@ -105,135 +105,6 @@ def load_model( ):
105
 
106
  return model
107
 
108
- ##################################################
109
- ## vs chat
110
- ##################################################
111
- import torch
112
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
113
-
114
- from langchain_core.messages import AIMessage, HumanMessage
115
- from langchain_community.document_loaders import WebBaseLoader
116
- from langchain.text_splitter import RecursiveCharacterTextSplitter
117
- from langchain_community.vectorstores import Chroma
118
-
119
- from langchain_community.embeddings import HuggingFaceBgeEmbeddings
120
- from langchain.vectorstores.faiss import FAISS
121
-
122
-
123
- from dotenv import load_dotenv
124
- from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
125
- from langchain.chains import create_history_aware_retriever, create_retrieval_chain
126
- from langchain.chains.combine_documents import create_stuff_documents_chain
127
-
128
-
129
- load_dotenv()
130
-
131
- def get_vectorstore():
132
- '''
133
- FAISS
134
- A FAISS vector store containing the embeddings of the text chunks.
135
- '''
136
- model = "BAAI/bge-base-en-v1.5"
137
- encode_kwargs = {
138
- "normalize_embeddings": True
139
- } # set True to compute cosine similarity
140
- embeddings = HuggingFaceBgeEmbeddings(
141
- model_name=model, encode_kwargs=encode_kwargs, model_kwargs={"device": "cpu"}
142
- )
143
- # load from disk
144
- vector_store = Chroma(persist_directory="/home/user/.cache/chroma_db", embedding_function=embeddings)
145
- return vector_store
146
-
147
- def get_vectorstore_from_url(url):
148
- # get the text in document form
149
- loader = WebBaseLoader(url)
150
- document = loader.load()
151
-
152
- # split the document into chunks
153
- text_splitter = RecursiveCharacterTextSplitter()
154
- document_chunks = text_splitter.split_documents(document)
155
- #######
156
- '''
157
- FAISS
158
- A FAISS vector store containing the embeddings of the text chunks.
159
- '''
160
- model = "BAAI/bge-base-en-v1.5"
161
- encode_kwargs = {
162
- "normalize_embeddings": True
163
- } # set True to compute cosine similarity
164
- embeddings = HuggingFaceBgeEmbeddings(
165
- model_name=model, encode_kwargs=encode_kwargs, model_kwargs={"device": "cpu"}
166
- )
167
- # load from disk
168
- #vector_store = Chroma(persist_directory="/home/user/.cache/chroma_db", embedding_function=embeddings)
169
-
170
- #vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
171
- vector_store = Chroma.from_documents(document_chunks, embeddings, persist_directory="/home/user/.cache/chroma_db")
172
-
173
- all_documents = vector_store.get()['documents']
174
- total_records = len(all_documents)
175
- print("Total records in the collection: ", total_records)
176
-
177
- return vector_store
178
-
179
- def get_context_retriever_chain(vector_store):
180
-
181
- llm = load_model( )
182
-
183
- retriever = vector_store.as_retriever()
184
-
185
- prompt = ChatPromptTemplate.from_messages([
186
- MessagesPlaceholder(variable_name="chat_history"),
187
- ("user", "{input}"),
188
- ("user", "Given the above conversation, generate a search query to look up in order to get information relevant to the conversation")
189
- ])
190
-
191
- retriever_chain = create_history_aware_retriever(llm, retriever, prompt)
192
-
193
- return retriever_chain
194
-
195
- def get_conversational_rag_chain(retriever_chain):
196
-
197
- llm = load_model( )
198
-
199
- prompt = ChatPromptTemplate.from_messages([
200
- ("system", "Du bist eine freundlicher Mitarbeiterin Namens Susie und arbeitest in einenm Call Center. Du beantwortest basierend auf dem Context. Benutze nur den Inhalt des Context. Füge wenn möglich die Quelle hinzu. Antworte mit: Ich bin mir nicht sicher. Wenn die Antwort nicht aus dem Context hervorgeht. Antworte auf Deutsch, bitte? CONTEXT:\n\n{context}"),
201
- MessagesPlaceholder(variable_name="chat_history"),
202
- ("user", "{input}"),
203
- ])
204
-
205
- stuff_documents_chain = create_stuff_documents_chain(llm,prompt)
206
-
207
- return create_retrieval_chain(retriever_chain, stuff_documents_chain)
208
-
209
-
210
- ###################
211
-
212
- ###################
213
- import gradio as gr
214
-
215
-
216
- chat_history = [] # Set your chat history here
217
-
218
- # Define your function here
219
- def get_response(user_input):
220
-
221
- vs = get_vectorstore()
222
- chat_history =[]
223
- retriever_chain = get_context_retriever_chain(vs)
224
- conversation_rag_chain = get_conversational_rag_chain(retriever_chain)
225
-
226
- response = conversation_rag_chain.invoke({
227
- "chat_history": chat_history,
228
- "input": user_input
229
- })
230
- #print("get_response " +response)
231
- res = response['answer']
232
- parts = res.split(" Assistant: ")
233
- last_part = parts[-1]
234
- return last_part
235
-
236
-
237
  ###############
238
  #####
239
  #####
@@ -256,105 +127,10 @@ app.add_middleware(
256
 
257
  @app.post("/generate/")
258
  def generate(user_input, history=[]):
259
- print("----yuhu -----")
260
- return get_response(user_input, history)
261
- ##################
262
-
263
- def history_to_dialog_format(chat_history: list[str]):
264
- dialog = []
265
- if len(chat_history) > 0:
266
- for idx, message in enumerate(chat_history[0]):
267
- role = "user" if idx % 2 == 0 else "assistant"
268
- dialog.append({
269
- "role": role,
270
- "content": message,
271
- })
272
- return dialog
273
-
274
- def get_response(message, history):
275
- dialog = history_to_dialog_format(history)
276
- dialog.append({"role": "user", "content": message})
277
-
278
- # Define the prompt as a ChatPromptValue object
279
- #user_input = ChatPromptValue(user_input)
280
-
281
- # Convert the prompt to a tensor
282
- #input_ids = user_input.tensor
283
-
284
-
285
- #vs = get_vectorstore_from_url(user_url, all_domain)
286
- vs = get_vectorstore()
287
-
288
- history =[]
289
- retriever_chain = get_context_retriever_chain(vs)
290
- conversation_rag_chain = get_conversational_rag_chain(retriever_chain)
291
-
292
- response = conversation_rag_chain.invoke({
293
- "chat_history": history,
294
- "input": message + " Assistant: ",
295
- "chat_message": message + " Assistant: "
296
- })
297
- #print("get_response " +response)
298
- res = response['answer']
299
- parts = res.split(" Assistant: ")
300
- last_part = parts[-1]
301
- return last_part#[-1]['generation']['content']
302
-
303
-
304
-
305
-
306
-
307
-
308
- ######
309
-
310
- ########
311
- import requests
312
- from bs4 import BeautifulSoup
313
- from urllib.parse import urlparse, urljoin
314
-
315
- def get_links_from_page(url, visited_urls, domain_links):
316
- if url in visited_urls:
317
- return
318
-
319
- if len(visited_urls) > 25:
320
- return
321
-
322
- visited_urls.add(url)
323
- print(url)
324
- response = requests.get(url)
325
-
326
- if response.status_code == 200:
327
- soup = BeautifulSoup(response.content, 'html.parser')
328
- base_url = urlparse(url).scheme + '://' + urlparse(url).netloc
329
- links = soup.find_all('a', href=True)
330
-
331
- for link in links:
332
- href = link.get('href')
333
- absolute_url = urljoin(base_url, href)
334
- parsed_url = urlparse(absolute_url)
335
-
336
- if parsed_url.netloc == urlparse(url).netloc:
337
- domain_links.add(absolute_url)
338
- get_links_from_page(absolute_url, visited_urls, domain_links)
339
-
340
- else:
341
- print(f"Failed to retrieve content from {url}. Status code: {response.status_code}")
342
-
343
- def get_all_links_from_domain(domain_url):
344
- visited_urls = set()
345
- domain_links = set()
346
- get_links_from_page(domain_url, visited_urls, domain_links)
347
- return domain_links
348
-
349
-
350
-
351
-
352
-
353
- def simple(text:str):
354
- return text +" hhhmmm "
355
 
356
  fe_app = gr.ChatInterface(
357
- fn=get_response,
358
  #fn=simple,
359
  # inputs=["text"],
360
  # outputs="text",
@@ -378,12 +154,4 @@ async def startup():
378
 
379
  print("######################")
380
  print(result)
381
- #domain_url = 'https://globl.contact/'
382
- #links = get_all_links_from_domain(domain_url)
383
- #print("Links from the domain:", links)
384
-
385
- #########
386
- # Assuming visited_urls is a list of URLs
387
- #for url in links:
388
- # vs = get_vectorstore_from_url(url)
389
- #load_model()
 
105
 
106
  return model
107
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  ###############
109
  #####
110
  #####
 
127
 
128
  @app.post("/generate/")
129
  def generate(user_input, history=[]):
130
+ return text +" hhhmmm "
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
  fe_app = gr.ChatInterface(
133
+ fn=generate,
134
  #fn=simple,
135
  # inputs=["text"],
136
  # outputs="text",
 
154
 
155
  print("######################")
156
  print(result)
157
+