Spaces:

littlezebra
/

hellow-langChain

Sleeping

File size: 5,172 Bytes

40cdb6a
 
 
 
 
b4c9a1a
40cdb6a
 
acdfd32
5fb9ac1
40cdb6a
 
 
 
b4c9a1a
 
 
 
40cdb6a
 
 
 
d91fac5
40cdb6a
1ac8142
 
c5241c7
2a58aaa
acdfd32
40cdb6a
 
 
a4d283e
40cdb6a
 
 
 
 
 
 
 
1ac8142
 
 
40cdb6a
 
6978c3c
40cdb6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
efdfa77
 
 
40cdb6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b4c9a1a
 
 
 
 
 
 
40cdb6a
6978c3c
40cdb6a
 
 
 
 
 
 
 
 
 
 
 
 
 
b4c9a1a
40cdb6a
6978c3c
40cdb6a

from typing import Callable, Optional

import gradio as gr
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Zilliz
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import RetrievalQAWithSourcesChain
import uuid
from project.llm.zhipuai_llm import ZhipuAILLM

chain: Optional[Callable] = None


def web_loader(file, openai_key, puzhiai_key, zilliz_uri, user, password):
    if not file:
        return "please upload file"
    loader = TextLoader(file)
    docs = loader.load()

    text_splitter = CharacterTextSplitter(chunk_size=1024, chunk_overlap=0)
    docs = text_splitter.split_documents(docs)
    embeddings = OpenAIEmbeddings(model="text-embedding-ada-002", openai_api_key=openai_key)

    if not embeddings:
        return "embeddings not"

    texts = [d.page_content for d in docs]

    docsearch = Zilliz.from_documents(
        docs,
        embedding=embeddings,
        ids=[str(uuid.uuid4()) for _ in range(len(texts))],
        connection_args={
            "uri": zilliz_uri,
            "user": user,
            "password": password,
            "secure": True,
        },
    )

    if not docsearch:
        return "docsearch not"

    global chain
    chain = RetrievalQAWithSourcesChain.from_chain_type(
        ZhipuAILLM(model="glm-3-turbo", temperature=0.1, zhipuai_api_key=puzhiai_key),
        chain_type="map_reduce",
        retriever=docsearch.as_retriever(),
    )
    return "success to load data"


def query(question):
    global chain
    # "What is milvus?"
    if not chain:
        return "please load the data first"
    return chain(inputs={"question": question}, return_only_outputs=True).get(
        "answer", "fail to get answer"
    )


if __name__ == "__main__":
    block = gr.Blocks()
    with block as demo:
        gr.Markdown(
            """
        <h1><center>Langchain And Zilliz Cloud Example</center></h1>
        This is how to use Zilliz Cloud as vector store in LangChain.
        The purpose of this example is to allow you to input multiple URLs (separated by newlines) and then ask questions about the content of the corresponding web pages.
        
        v.2.26.19.52
        
        ## 📋 Prerequisite:

        1. 🔑 To obtain an OpenAI key, please visit https://platform.openai.com/account/api-keys.
        2. 💻 Create a Zilliz Cloud account to get free credits for usage by visiting https://cloud.zilliz.com.
        3. 🗄️ Create a database in Zilliz Cloud.

        ## 📝 Steps for usage:

        1. 🖋️ Fill in the url list input box with multiple URLs.
        2. 🔑 Fill in the OpenAI API key in the openai api key input box.
        3. 🌩️ Fill in the Zilliz Cloud connection parameters, including the connection URL, corresponding username, and password.
        4. 🚀 Click the Load Data button to load the data. When the load status text box prompts that the data has been successfully loaded, proceed to the next step.
        5. ❓ In the question input box, enter the relevant question about the web page.
        6. 🔍 Click the Generate button to search for the answer to the question. The final answer will be displayed in the question answer text box.
        """
        )
        # url_list_text = gr.Textbox(
        #     label="url list",
        #     lines=3,
        #     placeholder="https://milvus.io/docs/overview.md",
        # )
        file = gr.File(label='请上传知识库文件',
                       file_types=['.txt', '.md', '.docx', '.pdf'])
        openai_key_text = gr.Textbox(label="openai api key", type="password", placeholder="sk-******")
        puzhiai_key_text = gr.Textbox(label="puzhi api key", type="password", placeholder="******")
        with gr.Row():
            zilliz_uri_text = gr.Textbox(
                label="zilliz cloud uri",
                placeholder="https://<instance-id>.<cloud-region-id>.vectordb.zillizcloud.com:<port>",
            )
            user_text = gr.Textbox(label="username", placeholder="db_admin")
            password_text = gr.Textbox(
                label="password", type="password", placeholder="******"
            )
        loader_output = gr.Textbox(label="load status")
        loader_btn = gr.Button("Load Data")
        loader_btn.click(
            fn=web_loader,
            inputs=[
                file,
                openai_key_text,
                puzhiai_key_text,
                zilliz_uri_text,
                user_text,
                password_text,
            ],
            outputs=loader_output,
            api_name="web_load",
        )

        question_text = gr.Textbox(
            label="question",
            lines=3,
            placeholder="What is milvus?",
        )
        query_output = gr.Textbox(label="question answer", lines=3)
        query_btn = gr.Button("Generate")
        query_btn.click(
            fn=query,
            inputs=[question_text],
            outputs=query_output,
            api_name="generate_answer",
        )

        demo.queue().launch(server_name="0.0.0.0", share=False)