from langchain.embeddings.openai import OpenAIEmbeddings from langchain.embeddings import HuggingFaceEmbeddings from langchain.text_splitter import CharacterTextSplitter from langchain.vectorstores import FAISS, Chroma, Pinecone from langchain.chains.question_answering import load_qa_chain from langchain.llms import OpenAI from gradio import gradio as gr from langchain.chat_models import ChatOpenAI from langchain.schema import AIMessage, HumanMessage from langchain.cache import InMemoryCache import langchain import time import os OPENAI_API_KEY=os.getenv('OPENAI_API_KEY') USER=os.getenv('USER') PASS=os.getenv('PASS') import pinecone # 初始化 pinecone pinecone.init( api_key=os.getenv('pinecone_api_key'), environment="gcp-starter" ) index_name="text-index" embeddings = OpenAIEmbeddings() llm = ChatOpenAI(temperature=0,model_name="gpt-3.5-turbo", verbose=True) # 加载数据 docsearch = Pinecone.from_existing_index(index_name, embeddings) chain = load_qa_chain(llm, chain_type="stuff") def predict(message, history): history_langchain_format = [] for human, ai in history: history_langchain_format.append(HumanMessage(content=human)) history_langchain_format.append(AIMessage(content=ai)) history_langchain_format.append(HumanMessage(content=message)) docs = docsearch.similarity_search(message) response = chain.run(input_documents=docs, question=message) partial_message = "" for chunk in response: if len(chunk[0]) != 0: time.sleep(0.1) partial_message = partial_message + chunk[0] yield partial_message langchain.llm_cache = InMemoryCache() gr.ChatInterface(predict, theme=gr.themes.Default(), textbox=gr.Textbox(placeholder="请输入您的问题...", container=False, scale=7), title="欢迎使用智造云AI助手", examples=["老师反馈文件传输慢怎么处理?", "用户作业同步状态速度为0应该联系谁?"]).queue().launch(debug=True,auth=(USER, PASS))