Update app.py
Browse files
app.py
CHANGED
@@ -140,7 +140,7 @@ def get_conversational_rag_chain(retriever_chain):
|
|
140 |
llm = load_model(model_name)
|
141 |
|
142 |
prompt = ChatPromptTemplate.from_messages([
|
143 |
-
("system", "Du bist
|
144 |
MessagesPlaceholder(variable_name="chat_history"),
|
145 |
("user", "{input}"),
|
146 |
])
|
@@ -237,11 +237,64 @@ def get_response(message, history):
|
|
237 |
|
238 |
|
239 |
#####
|
240 |
-
vs = get_vectorstore_from_url("https://
|
241 |
-
vs = get_vectorstore_from_url("https://www.bofrost.de/shop/kartoffelprodukte_5539/pommes-frites_5540/mikrowellen-pommes.html?position=7&clicked=")
|
242 |
-
vs = get_vectorstore_from_url("https://www.bofrost.de/shop/kartoffelprodukte_5539/pommes-frites_5540/backofen-knusper-frites-1200-g.html?position=1&clicked=search")
|
243 |
-
vs = get_vectorstore_from_url("https://www.bofrost.de/shop/laenderkueche_5573/asiatische-kueche_5574/chinesische-bratnudeln.html?emcs0=1&emcs1=Produktdetailseite&emcs2=00554&emcs3=01270&clicked=recommendation&position=2")
|
244 |
-
vs = get_vectorstore_from_url("https://www.bofrost.de/shop/fertige-gerichte_5507/pfannengerichte_5508/westfaelisches-gruenkohlgericht.html?emcs0=98&emcs1=Produktdetailseite&emcs2=00170&emcs3=00554&clicked=recommendation&position=1")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
245 |
|
246 |
|
247 |
def simple(text:str):
|
|
|
140 |
llm = load_model(model_name)
|
141 |
|
142 |
prompt = ChatPromptTemplate.from_messages([
|
143 |
+
("system", "Du bist eine freundlicher Mitarbeiterin Namens Susie und arbeitest in einenm Call Center. Du beantwortest basierend auf dem Context. Benutze nur den Inhalt des Context. Antworte mit: Ich bin mir nicht sicher. Wenn die Antwort nicht aus dem Context hervorgeht. Antworte auf Deutsch, bitte? CONTEXT:\n\n{context}"),
|
144 |
MessagesPlaceholder(variable_name="chat_history"),
|
145 |
("user", "{input}"),
|
146 |
])
|
|
|
237 |
|
238 |
|
239 |
#####
|
240 |
+
vs = get_vectorstore_from_url("https://globl.contact/")
|
241 |
+
#vs = get_vectorstore_from_url("https://www.bofrost.de/shop/kartoffelprodukte_5539/pommes-frites_5540/mikrowellen-pommes.html?position=7&clicked=")
|
242 |
+
#vs = get_vectorstore_from_url("https://www.bofrost.de/shop/kartoffelprodukte_5539/pommes-frites_5540/backofen-knusper-frites-1200-g.html?position=1&clicked=search")
|
243 |
+
#vs = get_vectorstore_from_url("https://www.bofrost.de/shop/laenderkueche_5573/asiatische-kueche_5574/chinesische-bratnudeln.html?emcs0=1&emcs1=Produktdetailseite&emcs2=00554&emcs3=01270&clicked=recommendation&position=2")
|
244 |
+
#vs = get_vectorstore_from_url("https://www.bofrost.de/shop/fertige-gerichte_5507/pfannengerichte_5508/westfaelisches-gruenkohlgericht.html?emcs0=98&emcs1=Produktdetailseite&emcs2=00170&emcs3=00554&clicked=recommendation&position=1")
|
245 |
+
|
246 |
+
|
247 |
+
######
|
248 |
+
|
249 |
+
########
|
250 |
+
import requests
|
251 |
+
from bs4 import BeautifulSoup
|
252 |
+
from urllib.parse import urlparse, urljoin
|
253 |
+
|
254 |
+
def get_links_from_page(url, visited_urls, domain_links):
|
255 |
+
if url in visited_urls:
|
256 |
+
return
|
257 |
+
|
258 |
+
if len(visited_urls) > 25:
|
259 |
+
return
|
260 |
+
|
261 |
+
visited_urls.add(url)
|
262 |
+
print(url)
|
263 |
+
response = requests.get(url)
|
264 |
+
|
265 |
+
if response.status_code == 200:
|
266 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
267 |
+
base_url = urlparse(url).scheme + '://' + urlparse(url).netloc
|
268 |
+
links = soup.find_all('a', href=True)
|
269 |
+
|
270 |
+
for link in links:
|
271 |
+
href = link.get('href')
|
272 |
+
absolute_url = urljoin(base_url, href)
|
273 |
+
parsed_url = urlparse(absolute_url)
|
274 |
+
|
275 |
+
if parsed_url.netloc == urlparse(url).netloc:
|
276 |
+
domain_links.add(absolute_url)
|
277 |
+
get_links_from_page(absolute_url, visited_urls, domain_links)
|
278 |
+
|
279 |
+
else:
|
280 |
+
print(f"Failed to retrieve content from {url}. Status code: {response.status_code}")
|
281 |
+
|
282 |
+
def get_all_links_from_domain(domain_url):
|
283 |
+
visited_urls = set()
|
284 |
+
domain_links = set()
|
285 |
+
get_links_from_page(domain_url, visited_urls, domain_links)
|
286 |
+
return domain_links
|
287 |
+
|
288 |
+
# Example usage:
|
289 |
+
domain_url = 'https://globl.contact/'
|
290 |
+
links = get_all_links_from_domain(domain_url)
|
291 |
+
print("Links from the domain:", links)
|
292 |
+
|
293 |
+
#########
|
294 |
+
# Assuming visited_urls is a list of URLs
|
295 |
+
for url in links:
|
296 |
+
vs = get_vectorstore_from_url(url)
|
297 |
+
|
298 |
|
299 |
|
300 |
def simple(text:str):
|