Spaces:
Runtime error
Runtime error
Commit
•
b4d6f98
1
Parent(s):
82a6d63
Update custom_llm.py
Browse files- custom_llm.py +29 -2
custom_llm.py
CHANGED
@@ -16,21 +16,48 @@ from langchain_core.messages import AIMessage, HumanMessage
|
|
16 |
from langchain_community.document_loaders import DirectoryLoader
|
17 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
18 |
from langchain_community.document_loaders import PyMuPDFLoader
|
19 |
-
import os
|
20 |
from langchain.embeddings import HuggingFaceEmbeddings
|
21 |
from langchain.vectorstores import FAISS
|
22 |
|
23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
def create_vectorstore():
|
25 |
loader = os.getenv('knowledge_base')
|
|
|
26 |
|
27 |
splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=20)
|
28 |
|
29 |
-
docs = splitter.create_documents([loader])
|
30 |
|
31 |
emb_model = HuggingFaceEmbeddings(model_name='sentence-transformers/paraphrase-multilingual-mpnet-base-v2', encode_kwargs={'normalize_embeddings': True})
|
32 |
db = FAISS.from_documents(docs, emb_model)
|
33 |
return db
|
|
|
34 |
|
35 |
def custom_chain_with_history(llm, memory):
|
36 |
|
|
|
16 |
from langchain_community.document_loaders import DirectoryLoader
|
17 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
18 |
from langchain_community.document_loaders import PyMuPDFLoader
|
19 |
+
import os, requests, bs4
|
20 |
from langchain.embeddings import HuggingFaceEmbeddings
|
21 |
from langchain.vectorstores import FAISS
|
22 |
|
23 |
|
24 |
+
def load_web(web_url):
|
25 |
+
|
26 |
+
r = requests.get(web_url)
|
27 |
+
soup=bs4.BeautifulSoup(r.content,"html.parser")
|
28 |
+
# input_list=
|
29 |
+
input_list = [div.text.strip() for div in soup.find_all("div") if div.text.strip() !='']
|
30 |
+
|
31 |
+
unique_strings = {}
|
32 |
+
|
33 |
+
for item in input_list:
|
34 |
+
# Remove '\n' and leading/trailing whitespaces
|
35 |
+
# cleaned_item = item.strip('\n').strip()
|
36 |
+
cleaned_item = item.strip()
|
37 |
+
|
38 |
+
# Check if the cleaned_item is not in the dictionary or if it's shorter
|
39 |
+
if cleaned_item not in unique_strings or len(item) > len(unique_strings[cleaned_item]):
|
40 |
+
# Add the cleaned_item to the dictionary with the original item as value
|
41 |
+
unique_strings[cleaned_item] = item
|
42 |
+
|
43 |
+
# Create a new list with the unique strings
|
44 |
+
result_list = list(unique_strings.values())
|
45 |
+
|
46 |
+
return result_list
|
47 |
+
|
48 |
+
|
49 |
def create_vectorstore():
|
50 |
loader = os.getenv('knowledge_base')
|
51 |
+
web_loader = load_web("https://lintasmediadanawa.com")
|
52 |
|
53 |
splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=20)
|
54 |
|
55 |
+
docs = splitter.create_documents([loader]+web_loader)
|
56 |
|
57 |
emb_model = HuggingFaceEmbeddings(model_name='sentence-transformers/paraphrase-multilingual-mpnet-base-v2', encode_kwargs={'normalize_embeddings': True})
|
58 |
db = FAISS.from_documents(docs, emb_model)
|
59 |
return db
|
60 |
+
|
61 |
|
62 |
def custom_chain_with_history(llm, memory):
|
63 |
|