from transformers import AutoTokenizer, AutoModel from langchain.document_loaders import TextLoader from langchain.embeddings import HuggingFaceEmbeddings from langchain.llms import HuggingFaceHub from langchain.text_splitter import CharacterTextSplitter from langchain.vectorstores import FAISS import datetime # 选择 embedding 算法 model_name = "sentence-transformers/all-mpnet-base-v2" model_kwargs = {'device': 'cpu'} encode_kwargs = {'normalize_embeddings': False} eb = HuggingFaceEmbeddings( model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs ) # 从文件读取数据 loader = TextLoader("./test.txt") docs = loader.load() # 分词 text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=50) tl = [] for doc in docs: for t in text_splitter.split_text(doc.page_content): tl.append(t) # 向量数据库 vector_store = FAISS.from_texts(tl, eb) query = '为什么client不直接从data server拉取数据' docs = vector_store.similarity_search(query, k=4) # 构造提问 context = '' for i in range(len(docs)): context += f'{i+1}、{docs[i].page_content}\n' prompt = f'已知:\n{context}\n问题:\n{query}' print(prompt) print(f'[{datetime.datetime.now()}]start to init model') tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True) model = AutoModel.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True).float() model = model.eval() print(f'[{datetime.datetime.now()}]model ready, start to chat') response, history = model.chat(tokenizer, prompt, history=[]) print(f'[{datetime.datetime.now()}]response:{response}')