Spaces:
Sleeping
Sleeping
File size: 6,614 Bytes
40cdb6a b4c9a1a 40cdb6a acdfd32 5fb9ac1 3de91d2 944d0e1 3de91d2 944d0e1 2573026 80b6833 1a58e38 b0c2444 1bf8b22 569761e 40cdb6a 569761e 40cdb6a b0c2444 569761e b4c9a1a 40cdb6a 7c28f5b 40cdb6a 1bf8b22 569761e 40cdb6a 1ac8142 c5241c7 2a58aaa b0c2444 acdfd32 40cdb6a b0c2444 40cdb6a 569761e 40cdb6a 1bf8b22 40cdb6a 1ac8142 40cdb6a 3de91d2 944d0e1 823bb76 944d0e1 2573026 1a58e38 405ecf5 2c97b5a ef56249 2c97b5a ef56249 2c97b5a 1a58e38 80b6833 1a58e38 80b6833 944d0e1 bbba288 cd99b75 40cdb6a 08e79a1 efdfa77 569761e efdfa77 40cdb6a b4c9a1a 821206f b4c9a1a 821206f 39b8141 40cdb6a b4c9a1a 40cdb6a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 |
from typing import Callable, Optional
import gradio as gr
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Zilliz
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import RetrievalQAWithSourcesChain
import uuid
from project.llm.zhipuai_llm import ZhipuAILLM
from project.prompt.answer_by_private_prompt import (
COMBINE_PROMPT,
EXAMPLE_PROMPT,
QUESTION_PROMPT,
DEFAULT_TEXT_QA_PROMPT,
DEFAULT_REFINE_PROMPT
)
from langchain.chains.combine_documents.refine import RefineDocumentsChain
from langchain.chains.llm import LLMChain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import StuffDocumentsChain
from langchain_core.prompts import PromptTemplate
import hashlib
from project.embeddings.zhipuai_embedding import ZhipuAIEmbeddings
import os
chain: Optional[Callable] = None
db_host = os.getenv("DB_HOST")
db_user = os.getenv("DB_USER")
db_password = os.getenv("DB_PASSWORD")
zhipuai_api_key = os.getenv("ZHIPU_AI_KEY")
def generate_article_id(content):
# 使用SHA-256哈希算法
sha256 = hashlib.sha256()
# 将文章内容编码为字节流并更新哈希对象
sha256.update(content.encode('utf-8'))
# 获取哈希值的十六进制表示
article_id = sha256.hexdigest()
return article_id
def web_loader(file):
if not file:
return "please upload file"
loader = TextLoader(file)
docs = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=512, chunk_overlap=0)
docs = text_splitter.split_documents(docs)
#embeddings = OpenAIEmbeddings(model="text-embedding-ada-002", openai_api_key=openai_key)
embeddings = ZhipuAIEmbeddings(zhipuai_api_key=zhipuai_api_key)
if not embeddings:
return "embeddings not"
texts = [d.page_content for d in docs]
article_ids = []
# 遍历texts列表
for text in texts:
# 使用generate_article_id函数生成文章ID,并将其添加到article_ids列表中
article_id = generate_article_id(text)
article_ids.append(article_id)
docsearch = Zilliz.from_documents(
docs,
embedding=embeddings,
ids=article_ids,
connection_args={
"uri": db_host,
"user": db_user,
"password": db_password,
"secure": True,
},
collection_name="LangChainCollectionYin"
)
if not docsearch:
return "docsearch not"
global chain
#chain = RetrievalQAWithSourcesChain.from_chain_type(
# ZhipuAILLM(model="glm-3-turbo", temperature=0.1, zhipuai_api_key=puzhiai_key),
# chain_type="refine",
# retriever=docsearch.as_retriever(),
#)
#chain = RetrievalQAWithSourcesChain.from_llm(
# ZhipuAILLM(model="glm-3-turbo", temperature=0.1, zhipuai_api_key=puzhiai_key),
# EXAMPLE_PROMPT,
# QUESTION_PROMPT,
# COMBINE_PROMPT,
# retriever=docsearch.as_retriever(),
#)
llm = ZhipuAILLM(model="glm-3-turbo", temperature=0.1, zhipuai_api_key=zhipuai_api_key)
#initial_chain = LLMChain(llm=llm, prompt=DEFAULT_TEXT_QA_PROMPT)
#refine_chain = LLMChain(llm=llm, prompt=DEFAULT_REFINE_PROMPT)
#combine_documents_chain = RefineDocumentsChain(
# initial_llm_chain=initial_chain,
# refine_llm_chain=refine_chain,
# document_variable_name="context_str",
# initial_response_name="existing_answer",
# document_prompt=EXAMPLE_PROMPT,
#)
document_prompt = PromptTemplate(
input_variables=["page_content"],
template="{page_content}"
)
document_variable_name = "context"
# The prompt here should take as an input variable the
# `document_variable_name`
prompt = PromptTemplate.from_template(
"""你是资深的技术支持工程师,请使用提供给你的文档内容去恢复客户问题,不需要编造或者虚构答案,也不需要回答文档之外的内容。
请用中文回答。
下边是我给你提供的文档,其中文档格式都是一问一答,不允许组装多个答案回答一个问题,并且问题答案也完全来自所提供的回答:
{context}
问题: {question}
答:"""
)
llm_chain = LLMChain(llm=llm, prompt=prompt)
combine_documents_chain = StuffDocumentsChain(
llm_chain=llm_chain,
document_prompt=document_prompt,
document_variable_name=document_variable_name
)
chain = RetrievalQAWithSourcesChain(combine_documents_chain=combine_documents_chain,
retriever=docsearch.as_retriever(search_kwargs={'k': 3}))
return "success to load data"
def query(question):
global chain
# "What is milvus?"
if not chain:
return "please load the data first"
return chain(inputs={"question": question}, return_only_outputs=True).get(
"answer", "fail to get answer"
)
if __name__ == "__main__":
block = gr.Blocks()
with block as demo:
gr.Markdown(
"""
<h1><center>Langchain And Zilliz App</center></h1>
v.2.28.15.3
"""
)
# url_list_text = gr.Textbox(
# label="url list",
# lines=3,
# placeholder="https://milvus.io/docs/overview.md",
# )
file = gr.File(label='请上传知识库文件\n可以处理 .txt, .md, .docx, .pdf 结尾的文件',
file_types=['.txt', '.md', '.docx', '.pdf'])
#openai_key_text = gr.Textbox(label="openai api key", type="password", placeholder="sk-******")
#puzhiai_key_text = gr.Textbox(label="puzhi api key", type="password", placeholder="******")
loader_output = gr.Textbox(label="load status")
loader_btn = gr.Button("Load Data")
loader_btn.click(
fn=web_loader,
inputs=[
file,
],
outputs=loader_output,
api_name="web_load",
)
question_text = gr.Textbox(
label="question",
lines=3,
placeholder="What is milvus?",
)
query_output = gr.Textbox(label="question answer", lines=3)
query_btn = gr.Button("Generate")
query_btn.click(
fn=query,
inputs=[question_text],
outputs=query_output,
api_name="generate_answer",
)
demo.queue().launch(server_name="0.0.0.0", share=False) |