guangliang.yin commited on
Commit
b0c2444
1 Parent(s): 11b895a

文章id优化

Browse files
Files changed (1) hide show
  1. app.py +21 -1
app.py CHANGED
@@ -20,10 +20,24 @@ from langchain.chains.llm import LLMChain
20
  from langchain.chains.combine_documents import create_stuff_documents_chain
21
  from langchain.chains import StuffDocumentsChain
22
  from langchain_core.prompts import PromptTemplate
 
23
 
24
  chain: Optional[Callable] = None
25
 
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  def web_loader(file, openai_key, puzhiai_key, zilliz_uri, user, password):
28
  if not file:
29
  return "please upload file"
@@ -38,11 +52,17 @@ def web_loader(file, openai_key, puzhiai_key, zilliz_uri, user, password):
38
  return "embeddings not"
39
 
40
  texts = [d.page_content for d in docs]
 
 
 
 
 
 
41
 
42
  docsearch = Zilliz.from_documents(
43
  docs,
44
  embedding=embeddings,
45
- ids=[str(uuid.uuid4()) for _ in range(len(texts))],
46
  connection_args={
47
  "uri": zilliz_uri,
48
  "user": user,
 
20
  from langchain.chains.combine_documents import create_stuff_documents_chain
21
  from langchain.chains import StuffDocumentsChain
22
  from langchain_core.prompts import PromptTemplate
23
+ import hashlib
24
 
25
  chain: Optional[Callable] = None
26
 
27
 
28
+ def generate_article_id(content):
29
+ # 使用SHA-256哈希算法
30
+ sha256 = hashlib.sha256()
31
+
32
+ # 将文章内容编码为字节流并更新哈希对象
33
+ sha256.update(content.encode('utf-8'))
34
+
35
+ # 获取哈希值的十六进制表示
36
+ article_id = sha256.hexdigest()
37
+
38
+ return article_id
39
+
40
+
41
  def web_loader(file, openai_key, puzhiai_key, zilliz_uri, user, password):
42
  if not file:
43
  return "please upload file"
 
52
  return "embeddings not"
53
 
54
  texts = [d.page_content for d in docs]
55
+ article_ids = []
56
+ # 遍历texts列表
57
+ for text in texts:
58
+ # 使用generate_article_id函数生成文章ID,并将其添加到article_ids列表中
59
+ article_id = generate_article_id(text)
60
+ article_ids.append(article_id)
61
 
62
  docsearch = Zilliz.from_documents(
63
  docs,
64
  embedding=embeddings,
65
+ ids=article_ids,
66
  connection_args={
67
  "uri": zilliz_uri,
68
  "user": user,