Spaces:
Sleeping
Sleeping
guangliang.yin
commited on
Commit
•
b0c2444
1
Parent(s):
11b895a
文章id优化
Browse files
app.py
CHANGED
@@ -20,10 +20,24 @@ from langchain.chains.llm import LLMChain
|
|
20 |
from langchain.chains.combine_documents import create_stuff_documents_chain
|
21 |
from langchain.chains import StuffDocumentsChain
|
22 |
from langchain_core.prompts import PromptTemplate
|
|
|
23 |
|
24 |
chain: Optional[Callable] = None
|
25 |
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
def web_loader(file, openai_key, puzhiai_key, zilliz_uri, user, password):
|
28 |
if not file:
|
29 |
return "please upload file"
|
@@ -38,11 +52,17 @@ def web_loader(file, openai_key, puzhiai_key, zilliz_uri, user, password):
|
|
38 |
return "embeddings not"
|
39 |
|
40 |
texts = [d.page_content for d in docs]
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
docsearch = Zilliz.from_documents(
|
43 |
docs,
|
44 |
embedding=embeddings,
|
45 |
-
ids=
|
46 |
connection_args={
|
47 |
"uri": zilliz_uri,
|
48 |
"user": user,
|
|
|
20 |
from langchain.chains.combine_documents import create_stuff_documents_chain
|
21 |
from langchain.chains import StuffDocumentsChain
|
22 |
from langchain_core.prompts import PromptTemplate
|
23 |
+
import hashlib
|
24 |
|
25 |
chain: Optional[Callable] = None
|
26 |
|
27 |
|
28 |
+
def generate_article_id(content):
|
29 |
+
# 使用SHA-256哈希算法
|
30 |
+
sha256 = hashlib.sha256()
|
31 |
+
|
32 |
+
# 将文章内容编码为字节流并更新哈希对象
|
33 |
+
sha256.update(content.encode('utf-8'))
|
34 |
+
|
35 |
+
# 获取哈希值的十六进制表示
|
36 |
+
article_id = sha256.hexdigest()
|
37 |
+
|
38 |
+
return article_id
|
39 |
+
|
40 |
+
|
41 |
def web_loader(file, openai_key, puzhiai_key, zilliz_uri, user, password):
|
42 |
if not file:
|
43 |
return "please upload file"
|
|
|
52 |
return "embeddings not"
|
53 |
|
54 |
texts = [d.page_content for d in docs]
|
55 |
+
article_ids = []
|
56 |
+
# 遍历texts列表
|
57 |
+
for text in texts:
|
58 |
+
# 使用generate_article_id函数生成文章ID,并将其添加到article_ids列表中
|
59 |
+
article_id = generate_article_id(text)
|
60 |
+
article_ids.append(article_id)
|
61 |
|
62 |
docsearch = Zilliz.from_documents(
|
63 |
docs,
|
64 |
embedding=embeddings,
|
65 |
+
ids=article_ids,
|
66 |
connection_args={
|
67 |
"uri": zilliz_uri,
|
68 |
"user": user,
|